diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/sord.c | 1299 | ||||
-rw-r--r-- | src/sord_internal.h | 52 | ||||
-rw-r--r-- | src/sord_test.c | 758 | ||||
-rw-r--r-- | src/sord_validate.c | 790 | ||||
-rw-r--r-- | src/sordi.c | 209 | ||||
-rw-r--r-- | src/sordmm_test.cpp | 25 | ||||
-rw-r--r-- | src/syntax.c | 207 | ||||
-rw-r--r-- | src/zix/btree.c | 740 | ||||
-rw-r--r-- | src/zix/btree.h | 155 | ||||
-rw-r--r-- | src/zix/common.h | 88 | ||||
-rw-r--r-- | src/zix/digest.c | 57 | ||||
-rw-r--r-- | src/zix/digest.h | 39 | ||||
-rw-r--r-- | src/zix/hash.c | 232 | ||||
-rw-r--r-- | src/zix/hash.h | 140 |
14 files changed, 4791 insertions, 0 deletions
diff --git a/src/sord.c b/src/sord.c new file mode 100644 index 0000000..c23b2c1 --- /dev/null +++ b/src/sord.c @@ -0,0 +1,1299 @@ +/* + Copyright 2011-2016 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +// C99 +#include <assert.h> +#include <errno.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#define ZIX_INLINE +#include "zix/digest.c" +#include "zix/hash.c" +#include "zix/btree.c" + +#include "sord_config.h" +#include "sord_internal.h" + +#define SORD_LOG(prefix, ...) fprintf(stderr, "[Sord::" prefix "] " __VA_ARGS__) + +#ifdef SORD_DEBUG_ITER +# define SORD_ITER_LOG(...) SORD_LOG("iter", __VA_ARGS__) +#else +# define SORD_ITER_LOG(...) +#endif +#ifdef SORD_DEBUG_SEARCH +# define SORD_FIND_LOG(...) SORD_LOG("search", __VA_ARGS__) +#else +# define SORD_FIND_LOG(...) +#endif +#ifdef SORD_DEBUG_WRITE +# define SORD_WRITE_LOG(...) SORD_LOG("write", __VA_ARGS__) +#else +# define SORD_WRITE_LOG(...) +#endif + +#define NUM_ORDERS 12 +#define STATEMENT_LEN 3 +#define TUP_LEN (STATEMENT_LEN + 1) +#define DEFAULT_ORDER SPO +#define DEFAULT_GRAPH_ORDER GSPO + +#define TUP_FMT "(%s %s %s %s)" +#define TUP_FMT_ELEM(e) ((e) ? sord_node_get_string(e) : (const uint8_t*)"*") +#define TUP_FMT_ARGS(t) \ + TUP_FMT_ELEM((t)[0]), \ + TUP_FMT_ELEM((t)[1]), \ + TUP_FMT_ELEM((t)[2]), \ + TUP_FMT_ELEM((t)[3]) + +#define TUP_S 0 +#define TUP_P 1 +#define TUP_O 2 +#define TUP_G 3 + +/** Triple ordering */ +typedef enum { + SPO, ///< Subject, Predicate, Object + SOP, ///< Subject, Object, Predicate + OPS, ///< Object, Predicate, Subject + OSP, ///< Object, Subject, Predicate + PSO, ///< Predicate, Subject, Object + POS, ///< Predicate, Object, Subject + GSPO, ///< Graph, Subject, Predicate, Object + GSOP, ///< Graph, Subject, Object, Predicate + GOPS, ///< Graph, Object, Predicate, Subject + GOSP, ///< Graph, Object, Subject, Predicate + GPSO, ///< Graph, Predicate, Subject, Object + GPOS ///< Graph, Predicate, Object, Subject +} SordOrder; + +#ifdef SORD_DEBUG_SEARCH +/** String name of each ordering (array indexed by SordOrder) */ +static const char* const order_names[NUM_ORDERS] = { + "spo", "sop", "ops", "osp", "pso", "pos", + "gspo", "gsop", "gops", "gosp", "gpso", "gpos" +}; +#endif + +/** + Quads of indices for each order, from most to least significant + (array indexed by SordOrder) +*/ +static const int orderings[NUM_ORDERS][TUP_LEN] = { + { 0, 1, 2, 3 }, { 0, 2, 1, 3 }, // SPO, SOP + { 2, 1, 0, 3 }, { 2, 0, 1, 3 }, // OPS, OSP + { 1, 0, 2, 3 }, { 1, 2, 0, 3 }, // PSO, POS + { 3, 0, 1, 2 }, { 3, 0, 2, 1 }, // GSPO, GSOP + { 3, 2, 1, 0 }, { 3, 2, 0, 1 }, // GOPS, GOSP + { 3, 1, 0, 2 }, { 3, 1, 2, 0 } // GPSO, GPOS +}; + +/** World */ +struct SordWorldImpl { + ZixHash* nodes; + SerdErrorSink error_sink; + void* error_handle; +}; + +/** Store */ +struct SordModelImpl { + SordWorld* world; + + /** Index for each possible triple ordering (may or may not exist). + * Each index is a tree of SordQuad with the appropriate ordering. + */ + ZixBTree* indices[NUM_ORDERS]; + + size_t n_quads; + size_t n_iters; +}; + +/** Mode for searching or iteration */ +typedef enum { + ALL, ///< Iterate over entire store + SINGLE, ///< Iteration over a single element (exact search) + RANGE, ///< Iterate over range with equal prefix + FILTER_RANGE, ///< Iterate over range with equal prefix, filtering + FILTER_ALL ///< Iterate to end of store, filtering +} SearchMode; + +/** Iterator over some range of a store */ +struct SordIterImpl { + const SordModel* sord; ///< Model being iterated over + ZixBTreeIter* cur; ///< Current DB cursor + SordQuad pat; ///< Pattern (in ordering order) + SordOrder order; ///< Store order (which index) + SearchMode mode; ///< Iteration mode + int n_prefix; ///< Prefix for RANGE and FILTER_RANGE + bool end; ///< True iff reached end + bool skip_graphs; ///< Iteration should ignore graphs +}; + +static uint32_t +sord_node_hash(const void* n) +{ + const SordNode* node = (const SordNode*)n; + uint32_t hash = zix_digest_start(); + hash = zix_digest_add(hash, node->node.buf, node->node.n_bytes); + hash = zix_digest_add(hash, &node->node.type, sizeof(node->node.type)); + if (node->node.type == SERD_LITERAL) { + hash = zix_digest_add(hash, &node->meta.lit, sizeof(node->meta.lit)); + } + return hash; +} + +static bool +sord_node_hash_equal(const void* a, const void* b) +{ + const SordNode* a_node = (const SordNode*)a; + const SordNode* b_node = (const SordNode*)b; + return (a_node == b_node) + || ((a_node->node.type == b_node->node.type) && + (a_node->node.type != SERD_LITERAL || + (a_node->meta.lit.datatype == b_node->meta.lit.datatype && + !strncmp(a_node->meta.lit.lang, + b_node->meta.lit.lang, + sizeof(a_node->meta.lit.lang)))) && + (serd_node_equals(&a_node->node, &b_node->node))); +} + +static void +error(SordWorld* world, SerdStatus st, const char* fmt, ...) +{ + va_list args; + va_start(args, fmt); + const SerdError e = { st, NULL, 0, 0, fmt, &args }; + if (world->error_sink) { + world->error_sink(world->error_handle, &e); + } else { + fprintf(stderr, "error: "); + vfprintf(stderr, fmt, args); + } + va_end(args); +} + +SordWorld* +sord_world_new(void) +{ + SordWorld* world = (SordWorld*)malloc(sizeof(SordWorld)); + world->error_sink = NULL; + world->error_handle = NULL; + + world->nodes = zix_hash_new( + sord_node_hash, sord_node_hash_equal, sizeof(SordNode)); + + return world; +} + +static void +free_node_entry(void* value, void* user_data) +{ + SordNode* node = (SordNode*)value; + if (node->node.type == SERD_LITERAL) { + sord_node_free((SordWorld*)user_data, node->meta.lit.datatype); + } + free((uint8_t*)node->node.buf); +} + +void +sord_world_free(SordWorld* world) +{ + zix_hash_foreach(world->nodes, free_node_entry, world); + zix_hash_free(world->nodes); + free(world); +} + +void +sord_world_set_error_sink(SordWorld* world, + SerdErrorSink error_sink, + void* handle) +{ + world->error_sink = error_sink; + world->error_handle = handle; +} + +/** Compare nodes, considering NULL a wildcard match. */ +static inline int +sord_node_compare(const SordNode* a, const SordNode* b) +{ + if (a == b || !a || !b) { + return 0; // Exact or wildcard match + } else if (a->node.type != b->node.type) { + return a->node.type - b->node.type; + } + + int cmp = 0; + switch (a->node.type) { + case SERD_URI: + case SERD_BLANK: + return strcmp((const char*)a->node.buf, (const char*)b->node.buf); + case SERD_LITERAL: + cmp = strcmp((const char*)sord_node_get_string(a), + (const char*)sord_node_get_string(b)); + if (cmp == 0) { + // Note: Can't use sord_node_compare here since it does wildcards + if (!a->meta.lit.datatype || !b->meta.lit.datatype) { + cmp = a->meta.lit.datatype - b->meta.lit.datatype; + } else { + cmp = strcmp((const char*)a->meta.lit.datatype->node.buf, + (const char*)b->meta.lit.datatype->node.buf); + } + } + if (cmp == 0) { + cmp = strcmp(a->meta.lit.lang, b->meta.lit.lang); + } + default: + break; + } + return cmp; +} + +bool +sord_node_equals(const SordNode* a, const SordNode* b) +{ + return a == b; // Nodes are interned +} + +/** Return true iff IDs are equivalent, or one is a wildcard */ +static inline bool +sord_id_match(const SordNode* a, const SordNode* b) +{ + return !a || !b || (a == b); +} + +static inline bool +sord_quad_match_inline(const SordQuad x, const SordQuad y) +{ + return sord_id_match(x[0], y[0]) + && sord_id_match(x[1], y[1]) + && sord_id_match(x[2], y[2]) + && sord_id_match(x[3], y[3]); +} + +bool +sord_quad_match(const SordQuad x, const SordQuad y) +{ + return sord_quad_match_inline(x, y); +} + +/** + Compare two quad IDs lexicographically. + NULL IDs (equal to 0) are treated as wildcards, always less than every + other possible ID, except itself. +*/ +static int +sord_quad_compare(const void* x_ptr, const void* y_ptr, void* user_data) +{ + const int* const ordering = (const int*)user_data; + const SordNode*const*const x = (const SordNode*const*)x_ptr; + const SordNode*const*const y = (const SordNode*const*)y_ptr; + + for (int i = 0; i < TUP_LEN; ++i) { + const int idx = ordering[i]; + const int cmp = sord_node_compare(x[idx], y[idx]); + if (cmp) { + return cmp; + } + } + + return 0; +} + +static inline bool +sord_iter_forward(SordIter* iter) +{ + if (!iter->skip_graphs) { + zix_btree_iter_increment(iter->cur); + return zix_btree_iter_is_end(iter->cur); + } + + SordNode** key = (SordNode**)zix_btree_get(iter->cur); + const SordQuad initial = { key[0], key[1], key[2], key[3] }; + zix_btree_iter_increment(iter->cur); + while (!zix_btree_iter_is_end(iter->cur)) { + key = (SordNode**)zix_btree_get(iter->cur); + for (int i = 0; i < 3; ++i) { + if (key[i] != initial[i]) { + return false; + } + } + + zix_btree_iter_increment(iter->cur); + } + + return true; +} + +/** + Seek forward as necessary until `iter` points at a match. + @return true iff iterator reached end of valid range. +*/ +static inline bool +sord_iter_seek_match(SordIter* iter) +{ + for (iter->end = true; + !zix_btree_iter_is_end(iter->cur); + sord_iter_forward(iter)) { + const SordNode** const key = (const SordNode**)zix_btree_get(iter->cur); + if (sord_quad_match_inline(key, iter->pat)) { + return (iter->end = false); + } + } + return true; +} + +/** + Seek forward as necessary until `iter` points at a match, or the prefix + no longer matches iter->pat. + @return true iff iterator reached end of valid range. +*/ +static inline bool +sord_iter_seek_match_range(SordIter* iter) +{ + assert(!iter->end); + + do { + const SordNode** key = (const SordNode**)zix_btree_get(iter->cur); + + if (sord_quad_match_inline(key, iter->pat)) { + return false; // Found match + } + + for (int i = 0; i < iter->n_prefix; ++i) { + const int idx = orderings[iter->order][i]; + if (!sord_id_match(key[idx], iter->pat[idx])) { + iter->end = true; // Reached end of valid range + return true; + } + } + } while (!sord_iter_forward(iter)); + + return (iter->end = true); // Reached end +} + +static SordIter* +sord_iter_new(const SordModel* sord, ZixBTreeIter* cur, const SordQuad pat, + SordOrder order, SearchMode mode, int n_prefix) +{ + SordIter* iter = (SordIter*)malloc(sizeof(SordIter)); + iter->sord = sord; + iter->cur = cur; + iter->order = order; + iter->mode = mode; + iter->n_prefix = n_prefix; + iter->end = false; + iter->skip_graphs = order < GSPO; + for (int i = 0; i < TUP_LEN; ++i) { + iter->pat[i] = pat[i]; + } + + switch (iter->mode) { + case ALL: + case SINGLE: + case RANGE: + assert( + sord_quad_match_inline((const SordNode**)zix_btree_get(iter->cur), + iter->pat)); + break; + case FILTER_RANGE: + sord_iter_seek_match_range(iter); + break; + case FILTER_ALL: + sord_iter_seek_match(iter); + break; + } + +#ifdef SORD_DEBUG_ITER + SordQuad value; + sord_iter_get(iter, value); + SORD_ITER_LOG("New %p pat=" TUP_FMT " cur=" TUP_FMT " end=%d skip=%d\n", + (void*)iter, TUP_FMT_ARGS(pat), TUP_FMT_ARGS(value), + iter->end, iter->skip_graphs); +#endif + + ++((SordModel*)sord)->n_iters; + return iter; +} + +const SordModel* +sord_iter_get_model(SordIter* iter) +{ + return iter->sord; +} + +void +sord_iter_get(const SordIter* iter, SordQuad tup) +{ + SordNode** key = (SordNode**)zix_btree_get(iter->cur); + for (int i = 0; i < TUP_LEN; ++i) { + tup[i] = key[i]; + } +} + +const SordNode* +sord_iter_get_node(const SordIter* iter, SordQuadIndex index) +{ + return (!sord_iter_end(iter) + ? ((SordNode**)zix_btree_get(iter->cur))[index] + : NULL); +} + +static bool +sord_iter_scan_next(SordIter* iter) +{ + if (iter->end) { + return true; + } + + const SordNode** key; + if (!iter->end) { + switch (iter->mode) { + case ALL: + // At the end if the cursor is (assigned above) + break; + case SINGLE: + iter->end = true; + SORD_ITER_LOG("%p reached single end\n", (void*)iter); + break; + case RANGE: + SORD_ITER_LOG("%p range next\n", (void*)iter); + // At the end if the MSNs no longer match + key = (const SordNode**)zix_btree_get(iter->cur); + assert(key); + for (int i = 0; i < iter->n_prefix; ++i) { + const int idx = orderings[iter->order][i]; + if (!sord_id_match(key[idx], iter->pat[idx])) { + iter->end = true; + SORD_ITER_LOG("%p reached non-match end\n", (void*)iter); + break; + } + } + break; + case FILTER_RANGE: + // Seek forward to next match, stopping if prefix changes + sord_iter_seek_match_range(iter); + break; + case FILTER_ALL: + // Seek forward to next match + sord_iter_seek_match(iter); + break; + } + } else { + SORD_ITER_LOG("%p reached index end\n", (void*)iter); + } + + if (iter->end) { + SORD_ITER_LOG("%p Reached end\n", (void*)iter); + return true; + } else { +#ifdef SORD_DEBUG_ITER + SordQuad tup; + sord_iter_get(iter, tup); + SORD_ITER_LOG("%p Increment to " TUP_FMT "\n", + (void*)iter, TUP_FMT_ARGS(tup)); +#endif + return false; + } +} + +bool +sord_iter_next(SordIter* iter) +{ + if (iter->end) { + return true; + } + + iter->end = sord_iter_forward(iter); + return sord_iter_scan_next(iter); +} + +bool +sord_iter_end(const SordIter* iter) +{ + return !iter || iter->end; +} + +void +sord_iter_free(SordIter* iter) +{ + SORD_ITER_LOG("%p Free\n", (void*)iter); + if (iter) { + --((SordModel*)iter->sord)->n_iters; + zix_btree_iter_free(iter->cur); + free(iter); + } +} + +/** + Return true iff `sord` has an index for `order`. + If `graphs` is true, `order` will be modified to be the + corresponding order with a G prepended (so G will be the MSN). +*/ +static inline bool +sord_has_index(SordModel* model, SordOrder* order, int* n_prefix, bool graphs) +{ + if (graphs) { + *order = (SordOrder)(*order + GSPO); + *n_prefix += 1; + } + + return model->indices[*order]; +} + +/** + Return the best available index for a pattern. + @param pat Pattern in standard (S P O G) order + @param mode Set to the (best) iteration mode for iterating over results + @param n_prefix Set to the length of the range prefix + (for `mode` == RANGE and `mode` == FILTER_RANGE) +*/ +static inline SordOrder +sord_best_index(SordModel* sord, + const SordQuad pat, + SearchMode* mode, + int* n_prefix) +{ + const bool graph_search = (pat[TUP_G] != 0); + + const unsigned sig + = (pat[0] ? 1 : 0) * 0x100 + + (pat[1] ? 1 : 0) * 0x010 + + (pat[2] ? 1 : 0) * 0x001; + + SordOrder good[2] = { (SordOrder)-1, (SordOrder)-1 }; + +#define PAT_CASE(sig, m, g0, g1, np) \ + case sig: \ + *mode = m; \ + good[0] = g0; \ + good[1] = g1; \ + *n_prefix = np; \ + break + + // Good orderings that don't require filtering + *mode = RANGE; + *n_prefix = 0; + switch (sig) { + case 0x000: + assert(graph_search); + *mode = RANGE; + *n_prefix = 1; + return DEFAULT_GRAPH_ORDER; + case 0x111: + *mode = SINGLE; + return graph_search ? DEFAULT_GRAPH_ORDER : DEFAULT_ORDER; + + PAT_CASE(0x001, RANGE, OPS, OSP, 1); + PAT_CASE(0x010, RANGE, POS, PSO, 1); + PAT_CASE(0x011, RANGE, OPS, POS, 2); + PAT_CASE(0x100, RANGE, SPO, SOP, 1); + PAT_CASE(0x101, RANGE, SOP, OSP, 2); + PAT_CASE(0x110, RANGE, SPO, PSO, 2); + } + + if (*mode == RANGE) { + if (sord_has_index(sord, &good[0], n_prefix, graph_search)) { + return good[0]; + } else if (sord_has_index(sord, &good[1], n_prefix, graph_search)) { + return good[1]; + } + } + + // Not so good orderings that require filtering, but can + // still be constrained to a range + switch (sig) { + PAT_CASE(0x011, FILTER_RANGE, OSP, PSO, 1); + PAT_CASE(0x101, FILTER_RANGE, SPO, OPS, 1); + // SPO is always present, so 0x110 is never reached here + default: break; + } + + if (*mode == FILTER_RANGE) { + if (sord_has_index(sord, &good[0], n_prefix, graph_search)) { + return good[0]; + } else if (sord_has_index(sord, &good[1], n_prefix, graph_search)) { + return good[1]; + } + } + + if (graph_search) { + *mode = FILTER_RANGE; + *n_prefix = 1; + return DEFAULT_GRAPH_ORDER; + } else { + *mode = FILTER_ALL; + return DEFAULT_ORDER; + } +} + +SordModel* +sord_new(SordWorld* world, unsigned indices, bool graphs) +{ + SordModel* model = (SordModel*)malloc(sizeof(struct SordModelImpl)); + model->world = world; + model->n_quads = 0; + model->n_iters = 0; + + for (unsigned i = 0; i < (NUM_ORDERS / 2); ++i) { + const int* const ordering = orderings[i]; + const int* const g_ordering = orderings[i + (NUM_ORDERS / 2)]; + + if (indices & (1 << i)) { + model->indices[i] = zix_btree_new( + sord_quad_compare, (void*)ordering, NULL); + if (graphs) { + model->indices[i + (NUM_ORDERS / 2)] = zix_btree_new( + sord_quad_compare, (void*)g_ordering, NULL); + } else { + model->indices[i + (NUM_ORDERS / 2)] = NULL; + } + } else { + model->indices[i] = NULL; + model->indices[i + (NUM_ORDERS / 2)] = NULL; + } + } + + if (!model->indices[DEFAULT_ORDER]) { + model->indices[DEFAULT_ORDER] = zix_btree_new( + sord_quad_compare, (void*)orderings[DEFAULT_ORDER], NULL); + } + if (graphs && !model->indices[DEFAULT_GRAPH_ORDER]) { + model->indices[DEFAULT_GRAPH_ORDER] = zix_btree_new( + sord_quad_compare, (void*)orderings[DEFAULT_GRAPH_ORDER], NULL); + } + + return model; +} + +static void +sord_node_free_internal(SordWorld* world, SordNode* node) +{ + assert(node->refs == 0); + + // Cache pointer to buffer to free after node removal and destruction + const uint8_t* const buf = node->node.buf; + + // Remove node from hash (which frees the node) + if (zix_hash_remove(world->nodes, node)) { + error(world, SERD_ERR_INTERNAL, "failed to remove node from hash\n"); + } + + // Free buffer + free((uint8_t*)buf); +} + +static void +sord_add_quad_ref(SordModel* model, const SordNode* node, SordQuadIndex i) +{ + if (node) { + assert(node->refs > 0); + ++((SordNode*)node)->refs; + if (node->node.type != SERD_LITERAL && i == SORD_OBJECT) { + ++((SordNode*)node)->meta.res.refs_as_obj; + } + } +} + +static void +sord_drop_quad_ref(SordModel* model, const SordNode* node, SordQuadIndex i) +{ + if (!node) { + return; + } + + assert(node->refs > 0); + if (node->node.type != SERD_LITERAL && i == SORD_OBJECT) { + assert(node->meta.res.refs_as_obj > 0); + --((SordNode*)node)->meta.res.refs_as_obj; + } + if (--((SordNode*)node)->refs == 0) { + sord_node_free_internal(sord_get_world(model), (SordNode*)node); + } +} + +void +sord_free(SordModel* model) +{ + if (!model) { + return; + } + + // Free nodes + SordQuad tup; + SordIter* i = sord_begin(model); + for (; !sord_iter_end(i); sord_iter_next(i)) { + sord_iter_get(i, tup); + for (int t = 0; t < TUP_LEN; ++t) { + sord_drop_quad_ref(model, tup[t], (SordQuadIndex)t); + } + } + sord_iter_free(i); + + // Free quads + ZixBTreeIter* t = zix_btree_begin(model->indices[DEFAULT_ORDER]); + for (; !zix_btree_iter_is_end(t); zix_btree_iter_increment(t)) { + free(zix_btree_get(t)); + } + zix_btree_iter_free(t); + + // Free indices + for (unsigned o = 0; o < NUM_ORDERS; ++o) { + if (model->indices[o]) { + zix_btree_free(model->indices[o]); + } + } + + free(model); +} + +SordWorld* +sord_get_world(SordModel* model) +{ + return model->world; +} + +size_t +sord_num_quads(const SordModel* model) +{ + return model->n_quads; +} + +size_t +sord_num_nodes(const SordWorld* world) +{ + return zix_hash_size(world->nodes); +} + +SordIter* +sord_begin(const SordModel* model) +{ + if (sord_num_quads(model) == 0) { + return NULL; + } else { + ZixBTreeIter* cur = zix_btree_begin(model->indices[DEFAULT_ORDER]); + SordQuad pat = { 0, 0, 0, 0 }; + return sord_iter_new(model, cur, pat, DEFAULT_ORDER, ALL, 0); + } +} + +SordIter* +sord_find(SordModel* model, const SordQuad pat) +{ + if (!pat[0] && !pat[1] && !pat[2] && !pat[3]) { + return sord_begin(model); + } + + SearchMode mode; + int n_prefix; + const SordOrder index_order = sord_best_index(model, pat, &mode, &n_prefix); + + SORD_FIND_LOG("Find " TUP_FMT " index=%s mode=%d n_prefix=%d\n", + TUP_FMT_ARGS(pat), order_names[index_order], mode, n_prefix); + + if (pat[0] && pat[1] && pat[2] && pat[3]) { + mode = SINGLE; // No duplicate quads (Sord is a set) + } + + ZixBTree* const db = model->indices[index_order]; + ZixBTreeIter* cur = NULL; + zix_btree_lower_bound(db, pat, &cur); + if (zix_btree_iter_is_end(cur)) { + SORD_FIND_LOG("No match found\n"); + zix_btree_iter_free(cur); + return NULL; + } + const SordNode** const key = (const SordNode**)zix_btree_get(cur); + if (!key || ( (mode == RANGE || mode == SINGLE) + && !sord_quad_match_inline(pat, key) )) { + SORD_FIND_LOG("No match found\n"); + zix_btree_iter_free(cur); + return NULL; + } + + return sord_iter_new(model, cur, pat, index_order, mode, n_prefix); +} + +SordIter* +sord_search(SordModel* model, + const SordNode* s, + const SordNode* p, + const SordNode* o, + const SordNode* g) +{ + SordQuad pat = { s, p, o, g }; + return sord_find(model, pat); +} + +SordNode* +sord_get(SordModel* model, + const SordNode* s, + const SordNode* p, + const SordNode* o, + const SordNode* g) +{ + if ((bool)s + (bool)p + (bool)o != 2) { + return NULL; + } + + SordIter* i = sord_search(model, s, p, o, g); + SordNode* ret = NULL; + if (!s) { + ret = sord_node_copy(sord_iter_get_node(i, SORD_SUBJECT)); + } else if (!p) { + ret = sord_node_copy(sord_iter_get_node(i, SORD_PREDICATE)); + } else if (!o) { + ret = sord_node_copy(sord_iter_get_node(i, SORD_OBJECT)); + } + + sord_iter_free(i); + return ret; +} + +bool +sord_ask(SordModel* model, + const SordNode* s, + const SordNode* p, + const SordNode* o, + const SordNode* g) +{ + SordQuad pat = { s, p, o, g }; + return sord_contains(model, pat); +} + +uint64_t +sord_count(SordModel* model, + const SordNode* s, + const SordNode* p, + const SordNode* o, + const SordNode* g) +{ + SordIter* i = sord_search(model, s, p, o, g); + uint64_t n = 0; + for (; !sord_iter_end(i); sord_iter_next(i)) { + ++n; + } + sord_iter_free(i); + return n; +} + +bool +sord_contains(SordModel* model, const SordQuad pat) +{ + SordIter* iter = sord_find(model, pat); + bool ret = (iter != NULL); + sord_iter_free(iter); + return ret; +} + +static uint8_t* +sord_strndup(const uint8_t* str, size_t len) +{ + uint8_t* dup = (uint8_t*)malloc(len + 1); + memcpy(dup, str, len + 1); + return dup; +} + +SordNodeType +sord_node_get_type(const SordNode* node) +{ + switch (node->node.type) { + case SERD_URI: + return SORD_URI; + case SERD_BLANK: + return SORD_BLANK; + default: + return SORD_LITERAL; + } + SORD_UNREACHABLE(); +} + +const uint8_t* +sord_node_get_string(const SordNode* node) +{ + return node->node.buf; +} + +const uint8_t* +sord_node_get_string_counted(const SordNode* node, size_t* bytes) +{ + *bytes = node->node.n_bytes; + return node->node.buf; +} + +const uint8_t* +sord_node_get_string_measured(const SordNode* node, + size_t* bytes, + size_t* chars) +{ + *bytes = node->node.n_bytes; + *chars = node->node.n_chars; + return node->node.buf; +} + +const char* +sord_node_get_language(const SordNode* node) +{ + if (node->node.type != SERD_LITERAL || !node->meta.lit.lang[0]) { + return NULL; + } + return node->meta.lit.lang; +} + +SordNode* +sord_node_get_datatype(const SordNode* node) +{ + return (node->node.type == SERD_LITERAL) ? node->meta.lit.datatype : NULL; +} + +SerdNodeFlags +sord_node_get_flags(const SordNode* node) +{ + return node->node.flags; +} + +bool +sord_node_is_inline_object(const SordNode* node) +{ + return (node->node.type == SERD_BLANK) && (node->meta.res.refs_as_obj == 1); +} + +static SordNode* +sord_insert_node(SordWorld* world, const SordNode* key, bool copy) +{ + SordNode* node = NULL; + ZixStatus st = zix_hash_insert(world->nodes, key, (const void**)&node); + switch (st) { + case ZIX_STATUS_EXISTS: + ++node->refs; + break; + case ZIX_STATUS_SUCCESS: + assert(node->refs == 1); + if (copy) { + node->node.buf = sord_strndup(node->node.buf, node->node.n_bytes); + } + if (node->node.type == SERD_LITERAL) { + node->meta.lit.datatype = sord_node_copy(node->meta.lit.datatype); + } + return node; + default: + error(world, SERD_ERR_INTERNAL, + "error inserting node `%s'\n", key->node.buf); + } + + if (!copy) { + // Free the buffer we would have copied if a new node was created + free((uint8_t*)key->node.buf); + } + + return node; +} + +static SordNode* +sord_new_uri_counted(SordWorld* world, const uint8_t* str, + size_t n_bytes, size_t n_chars, bool copy) +{ + if (!serd_uri_string_has_scheme(str)) { + error(world, SERD_ERR_BAD_ARG, + "attempt to map invalid URI `%s'\n", str); + return NULL; // Can't intern relative URIs + } + + const SordNode key = { + { str, n_bytes, n_chars, 0, SERD_URI }, 1, { { 0 } } + }; + + return sord_insert_node(world, &key, copy); +} + +SordNode* +sord_new_uri(SordWorld* world, const uint8_t* uri) +{ + const SerdNode node = serd_node_from_string(SERD_URI, uri); + return sord_new_uri_counted(world, uri, node.n_bytes, node.n_chars, true); +} + +SordNode* +sord_new_relative_uri(SordWorld* world, + const uint8_t* uri, + const uint8_t* base_uri) +{ + if (serd_uri_string_has_scheme(uri)) { + return sord_new_uri(world, uri); + } + SerdURI buri = SERD_URI_NULL; + SerdNode base = serd_node_new_uri_from_string(base_uri, NULL, &buri); + SerdNode node = serd_node_new_uri_from_string(uri, &buri, NULL); + + SordNode* ret = sord_new_uri_counted( + world, node.buf, node.n_bytes, node.n_chars, false); + + serd_node_free(&base); + return ret; +} + +static SordNode* +sord_new_blank_counted(SordWorld* world, const uint8_t* str, + size_t n_bytes, size_t n_chars) +{ + const SordNode key = { + { str, n_bytes, n_chars, 0, SERD_BLANK }, 1, { { 0 } } + }; + + return sord_insert_node(world, &key, true); +} + +SordNode* +sord_new_blank(SordWorld* world, const uint8_t* str) +{ + const SerdNode node = serd_node_from_string(SERD_URI, str); + return sord_new_blank_counted(world, str, node.n_bytes, node.n_chars); +} + +static SordNode* +sord_new_literal_counted(SordWorld* world, + SordNode* datatype, + const uint8_t* str, + size_t n_bytes, + size_t n_chars, + SerdNodeFlags flags, + const char* lang) +{ + SordNode key = { + { str, n_bytes, n_chars, flags, SERD_LITERAL }, 1, { { 0 } } + }; + key.meta.lit.datatype = sord_node_copy(datatype); + memset(key.meta.lit.lang, 0, sizeof(key.meta.lit.lang)); + if (lang) { + strncpy(key.meta.lit.lang, lang, sizeof(key.meta.lit.lang)); + } + + return sord_insert_node(world, &key, true); +} + +SordNode* +sord_new_literal(SordWorld* world, SordNode* datatype, + const uint8_t* str, const char* lang) +{ + SerdNodeFlags flags = 0; + size_t n_bytes = 0; + size_t n_chars = serd_strlen(str, &n_bytes, &flags); + return sord_new_literal_counted(world, datatype, + str, n_bytes, n_chars, flags, + lang); +} + +SordNode* +sord_node_from_serd_node(SordWorld* world, + SerdEnv* env, + const SerdNode* node, + const SerdNode* datatype, + const SerdNode* lang) +{ + if (!node) { + return NULL; + } + + SordNode* datatype_node = NULL; + SordNode* ret = NULL; + switch (node->type) { + case SERD_NOTHING: + return NULL; + case SERD_LITERAL: + datatype_node = sord_node_from_serd_node( + world, env, datatype, NULL, NULL), + ret = sord_new_literal_counted( + world, + datatype_node, + node->buf, + node->n_bytes, + node->n_chars, + node->flags, + lang ? (const char*)lang->buf : NULL); + sord_node_free(world, datatype_node); + return ret; + case SERD_URI: + if (serd_uri_string_has_scheme(node->buf)) { + return sord_new_uri_counted( + world, node->buf, node->n_bytes, node->n_chars, true); + } else { + SerdURI base_uri; + serd_env_get_base_uri(env, &base_uri); + SerdURI abs_uri; + SerdNode abs_uri_node = serd_node_new_uri_from_node( + node, &base_uri, &abs_uri); + ret = sord_new_uri_counted(world, + abs_uri_node.buf, + abs_uri_node.n_bytes, + abs_uri_node.n_chars, + true); + serd_node_free(&abs_uri_node); + return ret; + } + case SERD_CURIE: { + SerdChunk uri_prefix; + SerdChunk uri_suffix; + if (serd_env_expand(env, node, &uri_prefix, &uri_suffix)) { + error(world, SERD_ERR_BAD_CURIE, + "failed to expand CURIE `%s'\n", node->buf); + return NULL; + } + const size_t uri_len = uri_prefix.len + uri_suffix.len; + uint8_t* buf = (uint8_t*)malloc(uri_len + 1); + memcpy(buf, uri_prefix.buf, uri_prefix.len); + memcpy(buf + uri_prefix.len, uri_suffix.buf, uri_suffix.len); + buf[uri_len] = '\0'; + ret = sord_new_uri_counted( + world, buf, uri_len, serd_strlen(buf, NULL, NULL), false); + return ret; + } + case SERD_BLANK: + return sord_new_blank_counted( + world, node->buf, node->n_bytes, node->n_chars); + } + return NULL; +} + +const SerdNode* +sord_node_to_serd_node(const SordNode* node) +{ + return node ? &node->node : &SERD_NODE_NULL; +} + +void +sord_node_free(SordWorld* world, SordNode* node) +{ + if (!node) { + return; + } else if (node->refs == 0) { + error(world, SERD_ERR_BAD_ARG, "attempt to free garbage node\n"); + } else if (--node->refs == 0) { + sord_node_free_internal(world, node); + } +} + +SordNode* +sord_node_copy(const SordNode* node) +{ + SordNode* copy = (SordNode*)node; + if (copy) { + ++copy->refs; + } + return copy; +} + +static inline bool +sord_add_to_index(SordModel* model, const SordNode** tup, SordOrder order) +{ + return !zix_btree_insert(model->indices[order], tup); +} + +bool +sord_add(SordModel* model, const SordQuad tup) +{ + SORD_WRITE_LOG("Add " TUP_FMT "\n", TUP_FMT_ARGS(tup)); + if (!tup[0] || !tup[1] || !tup[2]) { + error(model->world, SERD_ERR_BAD_ARG, + "attempt to add quad with NULL field\n"); + return false; + } else if (model->n_iters > 0) { + error(model->world, SERD_ERR_BAD_ARG, "added tuple during iteration\n"); + } + + const SordNode** quad = (const SordNode**)malloc(sizeof(SordQuad)); + memcpy(quad, tup, sizeof(SordQuad)); + + for (unsigned i = 0; i < NUM_ORDERS; ++i) { + if (model->indices[i] && (i < GSPO || tup[3])) { + if (!sord_add_to_index(model, quad, (SordOrder)i)) { + assert(i == 0); // Assuming index coherency + free(quad); + return false; // Quad already stored, do nothing + } + } + } + + for (int i = 0; i < TUP_LEN; ++i) { + sord_add_quad_ref(model, tup[i], (SordQuadIndex)i); + } + + ++model->n_quads; + return true; +} + +void +sord_remove(SordModel* model, const SordQuad tup) +{ + SORD_WRITE_LOG("Remove " TUP_FMT "\n", TUP_FMT_ARGS(tup)); + if (model->n_iters > 0) { + error(model->world, SERD_ERR_BAD_ARG, "remove with iterator\n"); + } + + SordNode* quad = NULL; + for (unsigned i = 0; i < NUM_ORDERS; ++i) { + if (model->indices[i] && (i < GSPO || tup[3])) { + if (zix_btree_remove(model->indices[i], tup, (void**)&quad, NULL)) { + assert(i == 0); // Assuming index coherency + return; // Quad not found, do nothing + } + } + } + + free(quad); + + for (int i = 0; i < TUP_LEN; ++i) { + sord_drop_quad_ref(model, tup[i], (SordQuadIndex)i); + } + + --model->n_quads; +} + +SerdStatus +sord_erase(SordModel* model, SordIter* iter) +{ + if (model->n_iters > 1) { + error(model->world, SERD_ERR_BAD_ARG, "erased with many iterators\n"); + return SERD_ERR_BAD_ARG; + } + + SordQuad tup; + sord_iter_get(iter, tup); + + SORD_WRITE_LOG("Remove " TUP_FMT "\n", TUP_FMT_ARGS(tup)); + + SordNode* quad = NULL; + for (unsigned i = 0; i < NUM_ORDERS; ++i) { + if (model->indices[i] && (i < GSPO || tup[3])) { + if (zix_btree_remove(model->indices[i], tup, (void**)&quad, + i == iter->order ? &iter->cur : NULL)) { + return (i == 0) ? SERD_ERR_NOT_FOUND : SERD_ERR_INTERNAL; + } + } + } + iter->end = zix_btree_iter_is_end(iter->cur); + sord_iter_scan_next(iter); + + free(quad); + + for (int i = 0; i < TUP_LEN; ++i) { + sord_drop_quad_ref(model, tup[i], (SordQuadIndex)i); + } + + --model->n_quads; + return SERD_SUCCESS; +} diff --git a/src/sord_internal.h b/src/sord_internal.h new file mode 100644 index 0000000..bfe98f6 --- /dev/null +++ b/src/sord_internal.h @@ -0,0 +1,52 @@ +/* + Copyright 2011-2015 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SORD_SORD_INTERNAL_H +#define SORD_SORD_INTERNAL_H + +#include <stddef.h> +#include <stdint.h> + +#include "sord/sord.h" + +#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5) +# define SORD_UNREACHABLE() __builtin_unreachable() +#else +# define SORD_UNREACHABLE() assert(false) +#endif + +/** Resource node metadata */ +typedef struct { + size_t refs_as_obj; ///< References as a quad object +} SordResourceMetadata; + +/** Literal node metadata */ +typedef struct { + SordNode* datatype; ///< Optional literal data type URI + char lang[16]; ///< Optional language tag +} SordLiteralMetadata; + +/** Node */ +struct SordNodeImpl { + SerdNode node; ///< Serd node + size_t refs; ///< Reference count (# of containing quads) + union { + SordResourceMetadata res; + SordLiteralMetadata lit; + } meta; +}; + +#endif /* SORD_SORD_INTERNAL_H */ diff --git a/src/sord_test.c b/src/sord_test.c new file mode 100644 index 0000000..1ab350a --- /dev/null +++ b/src/sord_test.c @@ -0,0 +1,758 @@ +/* + Copyright 2011-2016 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "sord/sord.h" + +static const int DIGITS = 3; +static const unsigned n_objects_per = 2; + +static int n_expected_errors = 0; + +typedef struct { + SordQuad query; + int expected_num_results; +} QueryTest; + +#define USTR(s) ((const uint8_t*)(s)) + +static SordNode* +uri(SordWorld* world, int num) +{ + if (num == 0) { + return 0; + } + + char str[] = "eg:000"; + char* uri_num = str + 3; // First `0' + snprintf(uri_num, DIGITS + 1, "%0*d", DIGITS, num); + return sord_new_uri(world, (const uint8_t*)str); +} + +static int +test_fail(const char* fmt, ...) +{ + va_list args; + va_start(args, fmt); + fprintf(stderr, "error: "); + vfprintf(stderr, fmt, args); + va_end(args); + return 1; +} + +static int +generate(SordWorld* world, + SordModel* sord, + size_t n_quads, + SordNode* graph) +{ + fprintf(stderr, "Generating %zu (S P *) quads with %u objects each\n", + n_quads, n_objects_per); + + for (size_t i = 0; i < n_quads; ++i) { + int num = (i * n_objects_per) + 1; + + SordNode* ids[2 + n_objects_per]; + for (unsigned j = 0; j < 2 + n_objects_per; ++j) { + ids[j] = uri(world, num++); + } + + for (unsigned j = 0; j < n_objects_per; ++j) { + SordQuad tup = { ids[0], ids[1], ids[2 + j], graph }; + if (!sord_add(sord, tup)) { + return test_fail("Fail: Failed to add quad\n"); + } + } + + for (unsigned j = 0; j < 2 + n_objects_per; ++j) { + sord_node_free(world, ids[j]); + } + } + + // Add some literals + + // (98 4 "hello") and (98 4 "hello"^^<5>) + SordQuad tup = { 0, 0, 0, 0 }; + tup[0] = uri(world, 98); + tup[1] = uri(world, 4); + tup[2] = sord_new_literal(world, 0, USTR("hello"), NULL); + tup[3] = graph; + sord_add(sord, tup); + sord_node_free(world, (SordNode*)tup[2]); + tup[2] = sord_new_literal(world, uri(world, 5), USTR("hello"), NULL); + if (!sord_add(sord, tup)) { + return test_fail("Failed to add typed literal\n"); + } + + // (96 4 "hello"^^<4>) and (96 4 "hello"^^<5>) + tup[0] = uri(world, 96); + tup[1] = uri(world, 4); + tup[2] = sord_new_literal(world, uri(world, 4), USTR("hello"), NULL); + tup[3] = graph; + sord_add(sord, tup); + sord_node_free(world, (SordNode*)tup[2]); + tup[2] = sord_new_literal(world, uri(world, 5), USTR("hello"), NULL); + if (!sord_add(sord, tup)) { + return test_fail("Failed to add typed literal\n"); + } + + // (94 5 "hello") and (94 5 "hello"@en-gb) + tup[0] = uri(world, 94); + tup[1] = uri(world, 5); + tup[2] = sord_new_literal(world, 0, USTR("hello"), NULL); + tup[3] = graph; + sord_add(sord, tup); + sord_node_free(world, (SordNode*)tup[2]); + tup[2] = sord_new_literal(world, NULL, USTR("hello"), "en-gb"); + if (!sord_add(sord, tup)) { + return test_fail("Failed to add literal with language\n"); + } + + // (92 6 "hello"@en-us) and (92 5 "hello"@en-gb) + tup[0] = uri(world, 92); + tup[1] = uri(world, 6); + tup[2] = sord_new_literal(world, 0, USTR("hello"), "en-us"); + tup[3] = graph; + sord_add(sord, tup); + sord_node_free(world, (SordNode*)tup[2]); + tup[2] = sord_new_literal(world, NULL, USTR("hello"), "en-gb"); + if (!sord_add(sord, tup)) { + return test_fail("Failed to add literal with language\n"); + } + + sord_node_free(world, (SordNode*)tup[0]); + sord_node_free(world, (SordNode*)tup[2]); + tup[0] = uri(world, 14); + tup[2] = sord_new_literal(world, 0, USTR("bonjour"), "fr"); + sord_add(sord, tup); + sord_node_free(world, (SordNode*)tup[2]); + tup[2] = sord_new_literal(world, 0, USTR("salut"), "fr"); + sord_add(sord, tup); + + // Attempt to add some duplicates + if (sord_add(sord, tup)) { + return test_fail("Fail: Successfully added duplicate quad\n"); + } + if (sord_add(sord, tup)) { + return test_fail("Fail: Successfully added duplicate quad\n"); + } + + // Add a blank node subject + sord_node_free(world, (SordNode*)tup[0]); + tup[0] = sord_new_blank(world, USTR("ablank")); + sord_add(sord, tup); + + sord_node_free(world, (SordNode*)tup[1]); + sord_node_free(world, (SordNode*)tup[2]); + tup[1] = uri(world, 6); + tup[2] = uri(world, 7); + sord_add(sord, tup); + sord_node_free(world, (SordNode*)tup[0]); + sord_node_free(world, (SordNode*)tup[1]); + sord_node_free(world, (SordNode*)tup[2]); + + return EXIT_SUCCESS; +} + +#define TUP_FMT "(%6s %6s %6s)" +#define TUP_FMT_ARGS(t) \ + ((t)[0] ? sord_node_get_string((t)[0]) : USTR("*")), \ + ((t)[1] ? sord_node_get_string((t)[1]) : USTR("*")), \ + ((t)[2] ? sord_node_get_string((t)[2]) : USTR("*")) + +static int +test_read(SordWorld* world, SordModel* sord, SordNode* g, + const size_t n_quads) +{ + int ret = EXIT_SUCCESS; + + SordQuad id; + + SordIter* iter = sord_begin(sord); + if (sord_iter_get_model(iter) != sord) { + return test_fail("Fail: Iterator has incorrect sord pointer\n"); + } + + for (; !sord_iter_end(iter); sord_iter_next(iter)) { + sord_iter_get(iter, id); + } + + // Attempt to increment past end + if (!sord_iter_next(iter)) { + return test_fail("Fail: Successfully incremented past end\n"); + } + + sord_iter_free(iter); + + const uint8_t* s = USTR("hello"); + SordNode* plain_hello = sord_new_literal(world, 0, s, NULL); + SordNode* type4_hello = sord_new_literal(world, uri(world, 4), s, NULL); + SordNode* type5_hello = sord_new_literal(world, uri(world, 5), s, NULL); + SordNode* gb_hello = sord_new_literal(world, NULL, s, "en-gb"); + SordNode* us_hello = sord_new_literal(world, NULL, s, "en-us"); + +#define NUM_PATTERNS 18 + + QueryTest patterns[NUM_PATTERNS] = { + { { 0, 0, 0 }, (int)(n_quads * n_objects_per) + 12 }, + { { uri(world, 1), 0, 0 }, 2 }, + { { uri(world, 9), uri(world, 9), uri(world, 9) }, 0 }, + { { uri(world, 1), uri(world, 2), uri(world, 4) }, 1 }, + { { uri(world, 3), uri(world, 4), uri(world, 0) }, 2 }, + { { uri(world, 0), uri(world, 2), uri(world, 4) }, 1 }, + { { uri(world, 0), uri(world, 0), uri(world, 4) }, 1 }, + { { uri(world, 1), uri(world, 0), uri(world, 0) }, 2 }, + { { uri(world, 1), uri(world, 0), uri(world, 4) }, 1 }, + { { uri(world, 0), uri(world, 2), uri(world, 0) }, 2 }, + { { uri(world, 98), uri(world, 4), plain_hello }, 1 }, + { { uri(world, 98), uri(world, 4), type5_hello }, 1 }, + { { uri(world, 96), uri(world, 4), type4_hello }, 1 }, + { { uri(world, 96), uri(world, 4), type5_hello }, 1 }, + { { uri(world, 94), uri(world, 5), plain_hello }, 1 }, + { { uri(world, 94), uri(world, 5), gb_hello }, 1 }, + { { uri(world, 92), uri(world, 6), gb_hello }, 1 }, + { { uri(world, 92), uri(world, 6), us_hello }, 1 } }; + + SordQuad match = { uri(world, 1), uri(world, 2), uri(world, 4), g }; + if (!sord_contains(sord, match)) { + return test_fail("Fail: No match for " TUP_FMT "\n", + TUP_FMT_ARGS(match)); + } + + SordQuad nomatch = { uri(world, 1), uri(world, 2), uri(world, 9), g }; + if (sord_contains(sord, nomatch)) { + return test_fail("Fail: False match for " TUP_FMT "\n", + TUP_FMT_ARGS(nomatch)); + } + + if (sord_get(sord, NULL, NULL, uri(world, 3), g)) { + return test_fail("Fail: Get *,*,3 succeeded\n"); + } else if (!sord_node_equals( + sord_get(sord, uri(world, 1), uri(world, 2), NULL, g), + uri(world, 3))) { + return test_fail("Fail: Get 1,2,* != 3\n"); + } else if (!sord_node_equals( + sord_get(sord, uri(world, 1), NULL, uri(world, 3), g), + uri(world, 2))) { + return test_fail("Fail: Get 1,*,3 != 2\n"); + } else if (!sord_node_equals( + sord_get(sord, NULL, uri(world, 2), uri(world, 3), g), + uri(world, 1))) { + return test_fail("Fail: Get *,2,3 != 1\n"); + } + + for (unsigned i = 0; i < NUM_PATTERNS; ++i) { + QueryTest test = patterns[i]; + SordQuad pat = { test.query[0], test.query[1], test.query[2], g }; + fprintf(stderr, "Query " TUP_FMT "... ", TUP_FMT_ARGS(pat)); + + iter = sord_find(sord, pat); + int num_results = 0; + for (; !sord_iter_end(iter); sord_iter_next(iter)) { + sord_iter_get(iter, id); + ++num_results; + if (!sord_quad_match(pat, id)) { + sord_iter_free(iter); + return test_fail( + "Fail: Query result " TUP_FMT " does not match pattern\n", + TUP_FMT_ARGS(id)); + } + } + sord_iter_free(iter); + if (num_results != test.expected_num_results) { + return test_fail("Fail: Expected %d results, got %d\n", + test.expected_num_results, num_results); + } + fprintf(stderr, "OK (%u matches)\n", test.expected_num_results); + } + + // Query blank node subject + SordQuad pat = { sord_new_blank(world, USTR("ablank")), 0, 0 }; + if (!pat[0]) { + return test_fail("Blank node subject lost\n"); + } + fprintf(stderr, "Query " TUP_FMT "... ", TUP_FMT_ARGS(pat)); + iter = sord_find(sord, pat); + int num_results = 0; + for (; !sord_iter_end(iter); sord_iter_next(iter)) { + sord_iter_get(iter, id); + ++num_results; + if (!sord_quad_match(pat, id)) { + sord_iter_free(iter); + return test_fail( + "Fail: Query result " TUP_FMT " does not match pattern\n", + TUP_FMT_ARGS(id)); + } + } + fprintf(stderr, "OK\n"); + sord_node_free(world, (SordNode*)pat[0]); + sord_iter_free(iter); + if (num_results != 2) { + return test_fail("Blank node subject query failed\n"); + } + + // Test nested queries + fprintf(stderr, "Nested Queries... "); + const SordNode* last_subject = 0; + iter = sord_search(sord, NULL, NULL, NULL, NULL); + for (; !sord_iter_end(iter); sord_iter_next(iter)) { + sord_iter_get(iter, id); + if (id[0] == last_subject) { + continue; + } + + SordQuad subpat = { id[0], 0, 0 }; + SordIter* subiter = sord_find(sord, subpat); + uint64_t num_sub_results = 0; + if (sord_iter_get_node(subiter, SORD_SUBJECT) != id[0]) { + return test_fail("Fail: Incorrect initial submatch\n"); + } + for (; !sord_iter_end(subiter); sord_iter_next(subiter)) { + SordQuad subid; + sord_iter_get(subiter, subid); + if (!sord_quad_match(subpat, subid)) { + sord_iter_free(iter); + sord_iter_free(subiter); + return test_fail( + "Fail: Nested query result does not match pattern\n"); + } + ++num_sub_results; + } + sord_iter_free(subiter); + if (num_sub_results != n_objects_per) { + return test_fail( + "Fail: Nested query " TUP_FMT " failed" + " (%d results, expected %d)\n", + TUP_FMT_ARGS(subpat), num_sub_results, n_objects_per); + } + + uint64_t count = sord_count(sord, id[0], 0, 0, 0); + if (count != num_sub_results) { + return test_fail("Fail: Query " TUP_FMT " sord_count() %d" + "does not match result count %d\n", + TUP_FMT_ARGS(subpat), count, num_sub_results); + } + + last_subject = id[0]; + } + fprintf(stderr, "OK\n\n"); + sord_iter_free(iter); + + return ret; +} + +static SerdStatus +unexpected_error(void* handle, const SerdError* error) +{ + fprintf(stderr, "unexpected error: "); + vfprintf(stderr, error->fmt, *error->args); + return SERD_SUCCESS; +} + +static SerdStatus +expected_error(void* handle, const SerdError* error) +{ + fprintf(stderr, "expected error: "); + vfprintf(stderr, error->fmt, *error->args); + ++n_expected_errors; + return SERD_SUCCESS; +} + +static int +finished(SordWorld* world, SordModel* sord, int status) +{ + sord_free(sord); + sord_world_free(world); + return status; +} + +int +main(int argc, char** argv) +{ + static const size_t n_quads = 300; + + sord_free(NULL); // Shouldn't crash + + SordWorld* world = sord_world_new(); + + + // Attempt to create invalid URI + fprintf(stderr, "expected "); + SordNode* bad_uri = sord_new_uri(world, USTR("noscheme")); + if (bad_uri) { + return test_fail("Successfully created invalid URI \"noscheme\"\n"); + } + sord_node_free(world, bad_uri); + + sord_world_set_error_sink(world, expected_error, NULL); + + // Attempt to create invalid CURIE + SerdNode base = serd_node_from_string(SERD_URI, USTR("http://example.org/")); + SerdEnv* env = serd_env_new(&base); + SerdNode sbad = serd_node_from_string(SERD_CURIE, USTR("bad:")); + SordNode* bad = sord_node_from_serd_node(world, env, &sbad, NULL, NULL); + if (bad) { + return test_fail("Successfully created CURIE with bad namespace\n"); + } + sord_node_free(world, bad); + serd_env_free(env); + + // Attempt to create node from garbage + SerdNode junk = SERD_NODE_NULL; + junk.type = (SerdType)1234; + if (sord_node_from_serd_node(world, env, &junk, NULL, NULL)) { + return test_fail("Successfully created node from garbage serd node\n"); + } + + // Attempt to create NULL node + SordNode* nil_node = sord_node_from_serd_node( + world, NULL, &SERD_NODE_NULL, NULL, NULL); + if (nil_node) { + return test_fail("Successfully created NULL node\n"); + } + sord_node_free(world, nil_node); + + // Check node flags are set properly + SordNode* with_newline = sord_new_literal(world, NULL, USTR("a\nb"), NULL); + if (!(sord_node_get_flags(with_newline) & SERD_HAS_NEWLINE)) { + return test_fail("Newline flag not set\n"); + } + SordNode* with_quote = sord_new_literal(world, NULL, USTR("a\"b"), NULL); + if (!(sord_node_get_flags(with_quote) & SERD_HAS_QUOTE)) { + return test_fail("Quote flag not set\n"); + } + + // Create with minimal indexing + SordModel* sord = sord_new(world, SORD_SPO, false); + generate(world, sord, n_quads, NULL); + + if (test_read(world, sord, NULL, n_quads)) { + sord_free(sord); + sord_world_free(world); + return EXIT_FAILURE; + } + + // Check adding tuples with NULL fields fails + sord_world_set_error_sink(world, expected_error, NULL); + const size_t initial_num_quads = sord_num_quads(sord); + SordQuad tup = { 0, 0, 0, 0}; + if (sord_add(sord, tup)) { + return test_fail("Added NULL tuple\n"); + } + tup[0] = uri(world, 1); + if (sord_add(sord, tup)) { + return test_fail("Added tuple with NULL P and O\n"); + } + tup[1] = uri(world, 2); + if (sord_add(sord, tup)) { + return test_fail("Added tuple with NULL O\n"); + } + + if (sord_num_quads(sord) != initial_num_quads) { + return test_fail("Num quads %zu != %zu\n", + sord_num_quads(sord), initial_num_quads); + } + + // Check adding tuples with an active iterator fails + SordIter* iter = sord_begin(sord); + tup[2] = uri(world, 3); + if (sord_add(sord, tup)) { + return test_fail("Added tuple with active iterator\n"); + } + + // Check removing tuples with several active iterator fails + SordIter* iter2 = sord_begin(sord); + if (!sord_erase(sord, iter)) { + return test_fail("Erased tuple with several active iterators\n"); + } + n_expected_errors = 0; + sord_remove(sord, tup); + if (n_expected_errors != 1) { + return test_fail("Removed tuple with several active iterators\n"); + } + sord_iter_free(iter); + sord_iter_free(iter2); + + sord_world_set_error_sink(world, unexpected_error, NULL); + + // Check interning merges equivalent values + SordNode* uri_id = sord_new_uri(world, USTR("http://example.org")); + SordNode* blank_id = sord_new_blank(world, USTR("testblank")); + SordNode* lit_id = sord_new_literal(world, uri_id, USTR("hello"), NULL); + if (sord_node_get_type(uri_id) != SORD_URI) { + return test_fail("URI node has incorrect type\n"); + } else if (sord_node_get_type(blank_id) != SORD_BLANK) { + return test_fail("Blank node has incorrect type\n"); + } else if (sord_node_get_type(lit_id) != SORD_LITERAL) { + return test_fail("Literal node has incorrect type\n"); + } + + const size_t initial_num_nodes = sord_num_nodes(world); + + SordNode* uri_id2 = sord_new_uri(world, USTR("http://example.org")); + SordNode* blank_id2 = sord_new_blank(world, USTR("testblank")); + SordNode* lit_id2 = sord_new_literal(world, uri_id, USTR("hello"), NULL); + if (uri_id2 != uri_id || !sord_node_equals(uri_id2, uri_id)) { + fprintf(stderr, "Fail: URI interning failed (duplicates)\n"); + return finished(world, sord, EXIT_FAILURE); + } else if (blank_id2 != blank_id + || !sord_node_equals(blank_id2, blank_id)) { + fprintf(stderr, "Fail: Blank node interning failed (duplicates)\n"); + return finished(world, sord, EXIT_FAILURE); + } else if (lit_id2 != lit_id || !sord_node_equals(lit_id2, lit_id)) { + fprintf(stderr, "Fail: Literal interning failed (duplicates)\n"); + return finished(world, sord, EXIT_FAILURE); + } + + if (sord_num_nodes(world) != initial_num_nodes) { + return test_fail("Num nodes %zu != %zu\n", + sord_num_nodes(world), initial_num_nodes); + } + + const uint8_t ni_hao[] = { 0xE4, 0xBD, 0xA0, 0xE5, 0xA5, 0xBD, 0 }; + SordNode* chello = sord_new_literal(world, NULL, ni_hao, "cmn"); + + // Test literal length + size_t n_bytes; + size_t n_chars; + const uint8_t* str = sord_node_get_string_counted(lit_id2, &n_bytes); + if (strcmp((const char*)str, "hello")) { + return test_fail("Literal node corrupt\n"); + } else if (n_bytes != strlen("hello")) { + return test_fail("ASCII literal byte count incorrect\n"); + } + + str = sord_node_get_string_measured(lit_id2, &n_bytes, &n_chars); + if (n_bytes != strlen("hello") || n_chars != strlen("hello")) { + return test_fail("ASCII literal measured length incorrect\n"); + } else if (strcmp((const char*)str, "hello")) { + return test_fail("ASCII literal string incorrect\n"); + } + + str = sord_node_get_string_measured(chello, &n_bytes, &n_chars); + if (n_bytes != 6) { + return test_fail("Multi-byte literal byte count incorrect\n"); + } else if (n_chars != 2) { + return test_fail("Multi-byte literal character count incorrect\n"); + } else if (strcmp((const char*)str, (const char*)ni_hao)) { + return test_fail("Multi-byte literal string incorrect\n"); + } + + // Check interning doesn't clash non-equivalent values + SordNode* uri_id3 = sord_new_uri(world, USTR("http://example.orgX")); + SordNode* blank_id3 = sord_new_blank(world, USTR("testblankX")); + SordNode* lit_id3 = sord_new_literal(world, uri_id, USTR("helloX"), NULL); + if (uri_id3 == uri_id || sord_node_equals(uri_id3, uri_id)) { + fprintf(stderr, "Fail: URI interning failed (clash)\n"); + return finished(world, sord, EXIT_FAILURE); + } else if (blank_id3 == blank_id || sord_node_equals(blank_id3, blank_id)) { + fprintf(stderr, "Fail: Blank node interning failed (clash)\n"); + return finished(world, sord, EXIT_FAILURE); + } else if (lit_id3 == lit_id || sord_node_equals(lit_id3, lit_id)) { + fprintf(stderr, "Fail: Literal interning failed (clash)\n"); + return finished(world, sord, EXIT_FAILURE); + } + + // Check literal interning + SordNode* lit4 = sord_new_literal(world, NULL, USTR("hello"), NULL); + SordNode* lit5 = sord_new_literal(world, uri_id2, USTR("hello"), NULL); + SordNode* lit6 = sord_new_literal(world, NULL, USTR("hello"), "en-ca"); + if (lit4 == lit5 || sord_node_equals(lit4, lit5) + || lit4 == lit6 || sord_node_equals(lit4, lit6) + || lit5 == lit6 || sord_node_equals(lit5, lit6)) { + fprintf(stderr, "Fail: Literal interning failed (type/lang clash)\n"); + return finished(world, sord, EXIT_FAILURE); + } + + // Check relative URI construction + SordNode* reluri = sord_new_relative_uri( + world, USTR("a/b"), USTR("http://example.org/")); + if (strcmp((const char*)sord_node_get_string(reluri), + "http://example.org/a/b")) { + fprintf(stderr, "Fail: Bad relative URI constructed: <%s>\n", + sord_node_get_string(reluri)); + return finished(world, sord, EXIT_FAILURE); + } + SordNode* reluri2 = sord_new_relative_uri( + world, USTR("http://drobilla.net/"), USTR("http://example.org/")); + if (strcmp((const char*)sord_node_get_string(reluri2), + "http://drobilla.net/")) { + fprintf(stderr, "Fail: Bad relative URI constructed: <%s>\n", + sord_node_get_string(reluri)); + return finished(world, sord, EXIT_FAILURE); + } + + // Check comparison with NULL + sord_node_free(world, uri_id); + sord_node_free(world, blank_id); + sord_node_free(world, lit_id); + sord_node_free(world, uri_id2); + sord_node_free(world, blank_id2); + sord_node_free(world, lit_id2); + sord_node_free(world, uri_id3); + sord_node_free(world, blank_id3); + sord_node_free(world, lit_id3); + sord_free(sord); + + static const char* const index_names[6] = { + "spo", "sop", "ops", "osp", "pso", "pos" + }; + + for (int i = 0; i < 6; ++i) { + sord = sord_new(world, (1 << i), false); + printf("Testing Index `%s'\n", index_names[i]); + generate(world, sord, n_quads, 0); + if (test_read(world, sord, 0, n_quads)) { + return finished(world, sord, EXIT_FAILURE); + } + sord_free(sord); + } + + static const char* const graph_index_names[6] = { + "gspo", "gsop", "gops", "gosp", "gpso", "gpos" + }; + + for (int i = 0; i < 6; ++i) { + sord = sord_new(world, (1 << i), true); + printf("Testing Index `%s'\n", graph_index_names[i]); + SordNode* graph = uri(world, 42); + generate(world, sord, n_quads, graph); + if (test_read(world, sord, graph, n_quads)) { + return finished(world, sord, EXIT_FAILURE); + } + sord_free(sord); + } + + // Test removing + sord = sord_new(world, SORD_SPO, true); + tup[0] = uri(world, 1); + tup[1] = uri(world, 2); + tup[2] = sord_new_literal(world, 0, USTR("hello"), NULL); + tup[3] = 0; + sord_add(sord, tup); + if (!sord_ask(sord, tup[0], tup[1], tup[2], tup[3])) { + fprintf(stderr, "Failed to add tuple\n"); + return finished(world, sord, EXIT_FAILURE); + } + sord_node_free(world, (SordNode*)tup[2]); + tup[2] = sord_new_literal(world, 0, USTR("hi"), NULL); + sord_add(sord, tup); + sord_remove(sord, tup); + if (sord_num_quads(sord) != 1) { + fprintf(stderr, "Remove failed (%zu quads, expected 1)\n", + sord_num_quads(sord)); + return finished(world, sord, EXIT_FAILURE); + } + + iter = sord_find(sord, tup); + if (!sord_iter_end(iter)) { + fprintf(stderr, "Found removed tuple\n"); + return finished(world, sord, EXIT_FAILURE); + } + sord_iter_free(iter); + + // Test double remove (silent success) + sord_remove(sord, tup); + + // Load a couple graphs + SordNode* graph42 = uri(world, 42); + SordNode* graph43 = uri(world, 43); + generate(world, sord, 1, graph42); + generate(world, sord, 1, graph43); + + // Remove one graph via iterator + SerdStatus st; + iter = sord_search(sord, NULL, NULL, NULL, graph43); + while (!sord_iter_end(iter)) { + if ((st = sord_erase(sord, iter))) { + fprintf(stderr, "Remove by iterator failed (%s)\n", + serd_strerror(st)); + return finished(world, sord, EXIT_FAILURE); + } + } + sord_iter_free(iter); + + // Erase the first tuple (an element in the default graph) + iter = sord_begin(sord); + if (sord_erase(sord, iter)) { + return test_fail("Failed to erase begin iterator on non-empty model\n"); + } + sord_iter_free(iter); + + // Ensure only the other graph is left + SordQuad quad; + SordQuad pat = { 0, 0, 0, graph42 }; + for (iter = sord_begin(sord); !sord_iter_end(iter); sord_iter_next(iter)) { + sord_iter_get(iter, quad); + if (!sord_quad_match(quad, pat)) { + fprintf(stderr, "Graph removal via iteration failed\n"); + return finished(world, sord, EXIT_FAILURE); + } + } + sord_iter_free(iter); + + // Load file into two separate graphs + sord_free(sord); + sord = sord_new(world, SORD_SPO, true); + env = serd_env_new(&base); + SordNode* graph1 = sord_new_uri(world, USTR("http://example.org/graph1")); + SordNode* graph2 = sord_new_uri(world, USTR("http://example.org/graph2")); + SerdReader* reader = sord_new_reader(sord, env, SERD_TURTLE, graph1); + if ((st = serd_reader_read_string(reader, USTR("<s> <p> <o> .")))) { + fprintf(stderr, "Failed to read string (%s)\n", serd_strerror(st)); + return finished(world, sord, EXIT_FAILURE); + } + serd_reader_free(reader); + reader = sord_new_reader(sord, env, SERD_TURTLE, graph2); + if ((st = serd_reader_read_string(reader, USTR("<s> <p> <o> .")))) { + fprintf(stderr, "Failed to re-read string (%s)\n", serd_strerror(st)); + return finished(world, sord, EXIT_FAILURE); + } + serd_reader_free(reader); + serd_env_free(env); + + // Ensure we only see triple once + size_t n_triples = 0; + for (iter = sord_begin(sord); !sord_iter_end(iter); sord_iter_next(iter)) { + fprintf(stderr, "%s %s %s %s\n", + sord_node_get_string(sord_iter_get_node(iter, SORD_SUBJECT)), + sord_node_get_string(sord_iter_get_node(iter, SORD_PREDICATE)), + sord_node_get_string(sord_iter_get_node(iter, SORD_OBJECT)), + sord_node_get_string(sord_iter_get_node(iter, SORD_GRAPH))); + + ++n_triples; + } + sord_iter_free(iter); + if (n_triples != 1) { + fprintf(stderr, "Found duplicate triple\n"); + return finished(world, sord, EXIT_FAILURE); + } + + // Test SPO iteration on an SOP indexed store + sord_free(sord); + sord = sord_new(world, SORD_SOP, false); + generate(world, sord, 1, graph42); + for (iter = sord_begin(sord); !sord_iter_end(iter); sord_iter_next(iter)) { + ++n_triples; + } + sord_iter_free(iter); + + return finished(world, sord, EXIT_SUCCESS); +} diff --git a/src/sord_validate.c b/src/sord_validate.c new file mode 100644 index 0000000..a5c8d45 --- /dev/null +++ b/src/sord_validate.c @@ -0,0 +1,790 @@ +/* + Copyright 2012-2017 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#define _BSD_SOURCE 1 // for realpath +#define _DEFAULT_SOURCE 1 // for realpath + +#include <assert.h> +#include <stdlib.h> +#include <string.h> + +#ifdef _WIN32 +# include <windows.h> +#endif + +#include "serd/serd.h" +#include "sord/sord.h" +#include "sord_config.h" + +#ifdef HAVE_PCRE +# include <pcre.h> +#endif + +#define USTR(s) ((const uint8_t*)(s)) + +#define NS_foaf (const uint8_t*)"http://xmlns.com/foaf/0.1/" +#define NS_owl (const uint8_t*)"http://www.w3.org/2002/07/owl#" +#define NS_rdf (const uint8_t*)"http://www.w3.org/1999/02/22-rdf-syntax-ns#" +#define NS_rdfs (const uint8_t*)"http://www.w3.org/2000/01/rdf-schema#" +#define NS_xsd (const uint8_t*)"http://www.w3.org/2001/XMLSchema#" + +typedef struct { + SordNode* foaf_Document; + SordNode* owl_AnnotationProperty; + SordNode* owl_Class; + SordNode* owl_DatatypeProperty; + SordNode* owl_FunctionalProperty; + SordNode* owl_InverseFunctionalProperty; + SordNode* owl_ObjectProperty; + SordNode* owl_OntologyProperty; + SordNode* owl_Restriction; + SordNode* owl_Thing; + SordNode* owl_cardinality; + SordNode* owl_equivalentClass; + SordNode* owl_maxCardinality; + SordNode* owl_minCardinality; + SordNode* owl_onDatatype; + SordNode* owl_onProperty; + SordNode* owl_someValuesFrom; + SordNode* owl_withRestrictions; + SordNode* rdf_PlainLiteral; + SordNode* rdf_Property; + SordNode* rdf_first; + SordNode* rdf_rest; + SordNode* rdf_type; + SordNode* rdfs_Class; + SordNode* rdfs_Literal; + SordNode* rdfs_Resource; + SordNode* rdfs_domain; + SordNode* rdfs_label; + SordNode* rdfs_range; + SordNode* rdfs_subClassOf; + SordNode* xsd_anyURI; + SordNode* xsd_decimal; + SordNode* xsd_double; + SordNode* xsd_maxInclusive; + SordNode* xsd_minInclusive; + SordNode* xsd_pattern; + SordNode* xsd_string; +} URIs; + +int n_errors = 0; +int n_restrictions = 0; +bool one_line_errors = false; + +static int +print_version(void) +{ + printf("sord_validate " SORD_VERSION + " <http://drobilla.net/software/sord>\n"); + printf("Copyright 2012-2017 David Robillard <http://drobilla.net>.\n" + "License: <http://www.opensource.org/licenses/isc>\n" + "This is free software; you are free to change and redistribute it." + "\nThere is NO WARRANTY, to the extent permitted by law.\n"); + return 0; +} + +static int +print_usage(const char* name, bool error) +{ + FILE* const os = error ? stderr : stdout; + fprintf(os, "Usage: %s [OPTION]... INPUT...\n", name); + fprintf(os, "Validate RDF data\n\n"); + fprintf(os, " -h Display this help and exit\n"); + fprintf(os, " -l Print errors on a single line.\n"); + fprintf(os, " -v Display version information and exit\n"); + fprintf(os, + "Validate RDF data. This is a simple validator which checks\n" + "that all used properties are actually defined. It does not do\n" + "any fancy file retrieval, the files passed on the command line\n" + "are the only data that is read. In other words, you must pass\n" + "the definition of all vocabularies used on the command line.\n"); + return error ? 1 : 0; +} + +static uint8_t* +absolute_path(const uint8_t* path) +{ +#ifdef _WIN32 + char* out = (char*)malloc(MAX_PATH); + GetFullPathName((const char*)path, MAX_PATH, out, NULL); + return (uint8_t*)out; +#else + return (uint8_t*)realpath((const char*)path, NULL); +#endif +} + +static int +errorf(const SordQuad quad, const char* fmt, ...) +{ + va_list args; + va_start(args, fmt); + fprintf(stderr, "error: "); + vfprintf(stderr, fmt, args); + va_end(args); + + const char* sep = one_line_errors ? "\t" : "\n "; + fprintf(stderr, "%s%s%s%s%s%s\n", + sep, (const char*)sord_node_get_string(quad[SORD_SUBJECT]), + sep, (const char*)sord_node_get_string(quad[SORD_PREDICATE]), + sep, (const char*)sord_node_get_string(quad[SORD_OBJECT])); + + ++n_errors; + return 1; +} + +static bool +is_descendant_of(SordModel* model, + const URIs* uris, + const SordNode* child, + const SordNode* parent, + const SordNode* pred) +{ + if (!child) { + return false; + } else if ( + sord_node_equals(child, parent) || + sord_ask(model, child, uris->owl_equivalentClass, parent, NULL)) { + return true; + } + + SordIter* i = sord_search(model, child, pred, NULL, NULL); + for (; !sord_iter_end(i); sord_iter_next(i)) { + const SordNode* o = sord_iter_get_node(i, SORD_OBJECT); + if (sord_node_equals(child, o)) { + continue; // Weird class is explicitly a descendent of itself + } + if (is_descendant_of(model, uris, o, parent, pred)) { + sord_iter_free(i); + return true; + } + } + sord_iter_free(i); + + return false; +} + +static bool +regexp_match(const uint8_t* pat, const char* str) +{ +#ifdef HAVE_PCRE + // Append a $ to the pattern so we only match if the entire string matches + const size_t len = strlen((const char*)pat); + char* const regx = (char*)malloc(len + 2); + memcpy(regx, pat, len); + regx[len] = '$'; + regx[len + 1] = '\0'; + + const char* err; + int erroffset; + pcre* re = pcre_compile(regx, PCRE_ANCHORED, &err, &erroffset, NULL); + free(regx); + if (!re) { + fprintf(stderr, "Error in pattern `%s' at offset %d (%s)\n", + pat, erroffset, err); + return false; + } + + const bool ret = pcre_exec(re, NULL, str, strlen(str), 0, 0, NULL, 0) >= 0; + pcre_free(re); + return ret; +#endif // HAVE_PCRE + return true; +} + +static int +bound_cmp(SordModel* model, + const URIs* uris, + const SordNode* literal, + const SordNode* type, + const SordNode* bound) +{ + const char* str = (const char*)sord_node_get_string(literal); + const char* bound_str = (const char*)sord_node_get_string(bound); + const SordNode* pred = uris->owl_onDatatype; + const bool is_numeric = + is_descendant_of(model, uris, type, uris->xsd_decimal, pred) || + is_descendant_of(model, uris, type, uris->xsd_double, pred); + + if (is_numeric) { + const double fbound = serd_strtod(bound_str, NULL); + const double fliteral = serd_strtod(str, NULL); + return ((fliteral < fbound) ? -1 : + (fliteral > fbound) ? 1 : + 0); + } else { + return strcmp(str, bound_str); + } +} + +static bool +check_restriction(SordModel* model, + const URIs* uris, + const SordNode* literal, + const SordNode* type, + const SordNode* restriction) +{ + size_t len = 0; + const char* str = (const char*)sord_node_get_string_counted(literal, &len); + + // Check xsd:pattern + SordIter* p = sord_search(model, restriction, uris->xsd_pattern, 0, 0); + if (p) { + const SordNode* pat = sord_iter_get_node(p, SORD_OBJECT); + if (!regexp_match(sord_node_get_string(pat), str)) { + fprintf(stderr, "`%s' does not match <%s> pattern `%s'\n", + sord_node_get_string(literal), + sord_node_get_string(type), + sord_node_get_string(pat)); + sord_iter_free(p); + return false; + } + sord_iter_free(p); + ++n_restrictions; + } + + // Check xsd:minInclusive + SordIter* l = sord_search(model, restriction, uris->xsd_minInclusive, 0, 0); + if (l) { + const SordNode* lower = sord_iter_get_node(l, SORD_OBJECT); + if (bound_cmp(model, uris, literal, type, lower) < 0) { + fprintf(stderr, "`%s' is not >= <%s> minimum `%s'\n", + sord_node_get_string(literal), + sord_node_get_string(type), + sord_node_get_string(lower)); + sord_iter_free(l); + return false; + } + sord_iter_free(l); + ++n_restrictions; + } + + // Check xsd:maxInclusive + SordIter* u = sord_search(model, restriction, uris->xsd_maxInclusive, 0, 0); + if (u) { + const SordNode* upper = sord_iter_get_node(u, SORD_OBJECT); + if (bound_cmp(model, uris, literal, type, upper) > 0) { + fprintf(stderr, "`%s' is not <= <%s> maximum `%s'\n", + sord_node_get_string(literal), + sord_node_get_string(type), + sord_node_get_string(upper)); + sord_iter_free(u); + return false; + } + sord_iter_free(u); + ++n_restrictions; + } + + return true; // Unknown restriction, be quietly tolerant +} + +static bool +literal_is_valid(SordModel* model, + const URIs* uris, + const SordQuad quad, + const SordNode* literal, + const SordNode* type) +{ + if (!type) { + return true; + } + + /* Check that literal data is related to required type. We don't do a + strict subtype check here because e.g. an xsd:decimal might be a valid + xsd:unsignedInt, which the pattern checks will verify, but if the + literal type is not related to the required type at all + (e.g. xsd:decimal and xsd:string) there is a problem. */ + const SordNode* datatype = sord_node_get_datatype(literal); + if (datatype && datatype != type) { + if (!is_descendant_of( + model, uris, + datatype, type, uris->owl_onDatatype) && + !is_descendant_of( + model, uris, + type, datatype, uris->owl_onDatatype) && + !(sord_node_equals(datatype, uris->xsd_decimal) && + is_descendant_of( + model, uris, + type, uris->xsd_double, uris->owl_onDatatype))) { + errorf(quad, + "Literal `%s' datatype <%s> is not compatible with <%s>\n", + sord_node_get_string(literal), + sord_node_get_string(datatype), + sord_node_get_string(type)); + return false; + } + } + + // Find restrictions list + SordIter* rs = sord_search(model, type, uris->owl_withRestrictions, 0, 0); + if (sord_iter_end(rs)) { + return true; // No restrictions + } + + // Walk list, checking each restriction + const SordNode* head = sord_iter_get_node(rs, SORD_OBJECT); + while (head) { + SordIter* f = sord_search(model, head, uris->rdf_first, 0, 0); + if (!f) { + break; // Reached end of restrictions list without failure + } + + // Check this restriction + const bool good = check_restriction( + model, uris, literal, type, sord_iter_get_node(f, SORD_OBJECT)); + sord_iter_free(f); + + if (!good) { + sord_iter_free(rs); + return false; // Failed, literal is invalid + } + + // Seek to next list node + SordIter* n = sord_search(model, head, uris->rdf_rest, 0, 0); + head = n ? sord_iter_get_node(n, SORD_OBJECT) : NULL; + sord_iter_free(n); + } + + sord_iter_free(rs); + + SordIter* s = sord_search(model, type, uris->owl_onDatatype, 0, 0); + if (s) { + const SordNode* super = sord_iter_get_node(s, SORD_OBJECT); + const bool good = literal_is_valid( + model, uris, quad, literal, super); + sord_iter_free(s); + return good; // Match iff literal also matches supertype + } + + return true; // Matches top level type +} + +static bool +check_type(SordModel* model, + const URIs* uris, + const SordQuad quad, + const SordNode* node, + const SordNode* type) +{ + if (sord_node_equals(type, uris->rdfs_Resource) || + sord_node_equals(type, uris->owl_Thing)) { + return true; + } + + if (sord_node_get_type(node) == SORD_LITERAL) { + if (sord_node_equals(type, uris->rdfs_Literal)) { + return true; + } else if (sord_node_equals(type, uris->rdf_PlainLiteral)) { + return !sord_node_get_language(node); + } else { + return literal_is_valid(model, uris, quad, node, type); + } + } else if (sord_node_get_type(node) == SORD_URI) { + if (sord_node_equals(type, uris->foaf_Document)) { + return true; // Questionable... + } else if (is_descendant_of( + model, uris, + type, uris->xsd_anyURI, uris->owl_onDatatype)) { + /* Type is any URI and this is a URI, so pass. Restrictions on + anyURI subtypes are not currently checked (very uncommon). */ + return true; // Type is anyURI, and this is a URI + } else { + SordIter* t = sord_search(model, node, uris->rdf_type, NULL, NULL); + for (; !sord_iter_end(t); sord_iter_next(t)) { + if (is_descendant_of(model, uris, + sord_iter_get_node(t, SORD_OBJECT), + type, + uris->rdfs_subClassOf)) { + sord_iter_free(t); + return true; + } + } + sord_iter_free(t); + return false; + } + } else { + return true; // Blanks often lack explicit types, ignore + } + + return false; +} + +static uint64_t +count_non_blanks(SordIter* i, SordQuadIndex field) +{ + uint64_t n = 0; + for (; !sord_iter_end(i); sord_iter_next(i)) { + const SordNode* node = sord_iter_get_node(i, field); + if (sord_node_get_type(node) != SORD_BLANK) { + ++n; + } + } + return n; +} + +static int +check_properties(SordModel* model, URIs* uris) +{ + int st = 0; + SordIter* i = sord_begin(model); + for (; !sord_iter_end(i); sord_iter_next(i)) { + SordQuad quad; + sord_iter_get(i, quad); + + const SordNode* subj = quad[SORD_SUBJECT]; + const SordNode* pred = quad[SORD_PREDICATE]; + const SordNode* obj = quad[SORD_OBJECT]; + + bool is_any_property = false; + SordIter* t = sord_search(model, pred, uris->rdf_type, NULL, NULL); + for (; !sord_iter_end(t); sord_iter_next(t)) { + if (is_descendant_of(model, uris, + sord_iter_get_node(t, SORD_OBJECT), + uris->rdf_Property, + uris->rdfs_subClassOf)) { + is_any_property = true; + break; + } + } + sord_iter_free(t); + + const bool is_ObjectProperty = sord_ask( + model, pred, uris->rdf_type, uris->owl_ObjectProperty, 0); + const bool is_FunctionalProperty = sord_ask( + model, pred, uris->rdf_type, uris->owl_FunctionalProperty, 0); + const bool is_InverseFunctionalProperty = sord_ask( + model, pred, uris->rdf_type, uris->owl_InverseFunctionalProperty, 0); + const bool is_DatatypeProperty = sord_ask( + model, pred, uris->rdf_type, uris->owl_DatatypeProperty, 0); + + if (!is_any_property) { + st = errorf(quad, "Use of undefined property"); + } + + if (!sord_ask(model, pred, uris->rdfs_label, NULL, NULL)) { + st = errorf(quad, "Property <%s> has no label", + sord_node_get_string(pred)); + } + + if (is_DatatypeProperty && + sord_node_get_type(obj) != SORD_LITERAL) { + st = errorf(quad, "Datatype property with non-literal value"); + } + + if (is_ObjectProperty && + sord_node_get_type(obj) == SORD_LITERAL) { + st = errorf(quad, "Object property with literal value"); + } + + if (is_FunctionalProperty) { + SordIter* o = sord_search(model, subj, pred, NULL, NULL); + const uint64_t n = count_non_blanks(o, SORD_OBJECT); + if (n > 1) { + st = errorf(quad, "Functional property with %u objects", n); + } + sord_iter_free(o); + } + + if (is_InverseFunctionalProperty) { + SordIter* s = sord_search(model, NULL, pred, obj, NULL); + const unsigned n = count_non_blanks(s, SORD_SUBJECT); + if (n > 1) { + st = errorf( + quad, "Inverse functional property with %u subjects", n); + } + sord_iter_free(s); + } + + if (sord_node_equals(pred, uris->rdf_type) && + !sord_ask(model, obj, uris->rdf_type, uris->rdfs_Class, NULL) && + !sord_ask(model, obj, uris->rdf_type, uris->owl_Class, NULL)) { + st = errorf(quad, "Type is not a rdfs:Class or owl:Class"); + } + + if (sord_node_get_type(obj) == SORD_LITERAL && + !literal_is_valid(model, uris, quad, + obj, sord_node_get_datatype(obj))) { + st = errorf(quad, "Literal does not match datatype"); + } + + SordIter* r = sord_search(model, pred, uris->rdfs_range, NULL, NULL); + for (; !sord_iter_end(r); sord_iter_next(r)) { + const SordNode* range = sord_iter_get_node(r, SORD_OBJECT); + if (!check_type(model, uris, quad, obj, range)) { + st = errorf(quad, "Object not in range <%s>\n", + sord_node_get_string(range)); + } + } + sord_iter_free(r); + + SordIter* d = sord_search(model, pred, uris->rdfs_domain, NULL, NULL); + if (d) { + const SordNode* domain = sord_iter_get_node(d, SORD_OBJECT); + if (!check_type(model, uris, quad, subj, domain)) { + st = errorf(quad, "Subject not in domain <%s>", + sord_node_get_string(domain)); + } + sord_iter_free(d); + } + } + sord_iter_free(i); + + return st; +} + +static int +check_instance(SordModel* model, + const URIs* uris, + const SordNode* restriction, + const SordQuad quad) +{ + const SordNode* instance = quad[SORD_SUBJECT]; + int st = 0; + + const SordNode* prop = sord_get( + model, restriction, uris->owl_onProperty, NULL, NULL); + if (!prop) { + return 0; + } + + const unsigned values = sord_count(model, instance, prop, NULL, NULL); + + // Check exact cardinality + const SordNode* card = sord_get( + model, restriction, uris->owl_cardinality, NULL, NULL); + if (card) { + const unsigned c = atoi((const char*)sord_node_get_string(card)); + if (values != c) { + st = errorf(quad, "Property %s on %s has %u != %u values", + sord_node_get_string(prop), + sord_node_get_string(instance), + values, c); + } + } + + // Check minimum cardinality + const SordNode* minCard = sord_get( + model, restriction, uris->owl_minCardinality, NULL, NULL); + if (minCard) { + const unsigned m = atoi((const char*)sord_node_get_string(minCard)); + if (values < m) { + st = errorf(quad, "Property %s on %s has %u < %u values", + sord_node_get_string(prop), + sord_node_get_string(instance), + values, m); + } + } + + // Check maximum cardinality + const SordNode* maxCard = sord_get( + model, restriction, uris->owl_maxCardinality, NULL, NULL); + if (maxCard) { + const unsigned m = atoi((const char*)sord_node_get_string(maxCard)); + if (values < m) { + st = errorf(quad, "Property %s on %s has %u > %u values", + sord_node_get_string(prop), + sord_node_get_string(instance), + values, m); + } + } + + // Check someValuesFrom + SordIter* sf = sord_search( + model, restriction, uris->owl_someValuesFrom, NULL, NULL); + if (sf) { + const SordNode* type = sord_iter_get_node(sf, SORD_OBJECT); + + SordIter* v = sord_search(model, instance, prop, NULL, NULL); + bool found = false; + for (; !sord_iter_end(v); sord_iter_next(v)) { + const SordNode* value = sord_iter_get_node(v, SORD_OBJECT); + if (check_type(model, uris, quad, value, type)) { + found = true; + break; + } + } + if (!found) { + st = errorf(quad, "%s has no <%s> values of type <%s>\n", + sord_node_get_string(instance), + sord_node_get_string(prop), + sord_node_get_string(type)); + } + sord_iter_free(v); + } + sord_iter_free(sf); + + return st; +} + +static int +check_class_instances(SordModel* model, + const URIs* uris, + const SordNode* restriction, + const SordNode* klass) +{ + // Check immediate instances of this class + SordIter* i = sord_search(model, NULL, uris->rdf_type, klass, NULL); + for (; !sord_iter_end(i); sord_iter_next(i)) { + SordQuad quad; + sord_iter_get(i, quad); + check_instance(model, uris, restriction, quad); + } + sord_iter_free(i); + + // Check instances of all subclasses recursively + SordIter* s = sord_search(model, NULL, uris->rdfs_subClassOf, klass, NULL); + for (; !sord_iter_end(s); sord_iter_next(s)) { + const SordNode* subklass = sord_iter_get_node(s, SORD_SUBJECT); + check_class_instances(model, uris, restriction, subklass); + } + sord_iter_free(s); + + return 0; +} + +static int +check_instances(SordModel* model, const URIs* uris) +{ + int st = 0; + SordIter* r = sord_search( + model, NULL, uris->rdf_type, uris->owl_Restriction, NULL); + for (; !sord_iter_end(r); sord_iter_next(r)) { + const SordNode* restriction = sord_iter_get_node(r, SORD_SUBJECT); + const SordNode* prop = sord_get( + model, restriction, uris->owl_onProperty, NULL, NULL); + if (!prop) { + continue; + } + + SordIter* c = sord_search( + model, NULL, uris->rdfs_subClassOf, restriction, NULL); + for (; !sord_iter_end(c); sord_iter_next(c)) { + const SordNode* klass = sord_iter_get_node(c, SORD_SUBJECT); + check_class_instances(model, uris, restriction, klass); + } + sord_iter_free(c); + } + sord_iter_free(r); + + return st; +} + +int +main(int argc, char** argv) +{ + if (argc < 2) { + return print_usage(argv[0], true); + } + + int a = 1; + for (; a < argc && argv[a][0] == '-'; ++a) { + if (argv[a][1] == 'l') { + one_line_errors = true; + } else if (argv[a][1] == 'v') { + return print_version(); + } else { + fprintf(stderr, "%s: Unknown option `%s'\n", argv[0], argv[a]); + return print_usage(argv[0], true); + } + } + + SordWorld* world = sord_world_new(); + SordModel* model = sord_new(world, SORD_SPO|SORD_OPS, false); + SerdEnv* env = serd_env_new(&SERD_NODE_NULL); + SerdReader* reader = sord_new_reader(model, env, SERD_TURTLE, NULL); + + for (; a < argc; ++a) { + const uint8_t* input = (const uint8_t*)argv[a]; + uint8_t* in_path = absolute_path(serd_uri_to_path(input)); + + if (!in_path) { + fprintf(stderr, "Skipping file %s\n", input); + continue; + } + + SerdURI base_uri; + SerdNode base_uri_node = serd_node_new_file_uri( + in_path, NULL, &base_uri, true); + + serd_env_set_base_uri(env, &base_uri_node); + const SerdStatus st = serd_reader_read_file(reader, in_path); + if (st) { + fprintf(stderr, "error reading %s: %s\n", + in_path, serd_strerror(st)); + } + + serd_node_free(&base_uri_node); + free(in_path); + } + serd_reader_free(reader); + serd_env_free(env); + +#define URI(prefix, suffix) \ + uris.prefix##_##suffix = sord_new_uri(world, NS_##prefix #suffix) + + URIs uris; + URI(foaf, Document); + URI(owl, AnnotationProperty); + URI(owl, Class); + URI(owl, DatatypeProperty); + URI(owl, FunctionalProperty); + URI(owl, InverseFunctionalProperty); + URI(owl, ObjectProperty); + URI(owl, OntologyProperty); + URI(owl, Restriction); + URI(owl, Thing); + URI(owl, cardinality); + URI(owl, equivalentClass); + URI(owl, maxCardinality); + URI(owl, minCardinality); + URI(owl, onDatatype); + URI(owl, onProperty); + URI(owl, someValuesFrom); + URI(owl, withRestrictions); + URI(rdf, PlainLiteral); + URI(rdf, Property); + URI(rdf, first); + URI(rdf, rest); + URI(rdf, type); + URI(rdfs, Class); + URI(rdfs, Literal); + URI(rdfs, Resource); + URI(rdfs, domain); + URI(rdfs, label); + URI(rdfs, range); + URI(rdfs, subClassOf); + URI(xsd, anyURI); + URI(xsd, decimal); + URI(xsd, double); + URI(xsd, maxInclusive); + URI(xsd, minInclusive); + URI(xsd, pattern); + URI(xsd, string); + +#ifndef HAVE_PCRE + fprintf(stderr, "warning: Built without PCRE, datatypes not checked.\n"); +#endif + + const int prop_st = check_properties(model, &uris); + const int inst_st = check_instances(model, &uris); + + printf("Found %d errors among %d files (checked %d restrictions)\n", + n_errors, argc - 1, n_restrictions); + + sord_free(model); + sord_world_free(world); + return prop_st || inst_st; +} diff --git a/src/sordi.c b/src/sordi.c new file mode 100644 index 0000000..46e6273 --- /dev/null +++ b/src/sordi.c @@ -0,0 +1,209 @@ +/* + Copyright 2011-2016 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include <assert.h> +#include <stdlib.h> +#include <string.h> + +#ifdef _WIN32 +# include <windows.h> +#endif + +#include "serd/serd.h" +#include "sord/sord.h" +#include "sord_config.h" + +#define SORDI_ERROR(msg) fprintf(stderr, "sordi: " msg); +#define SORDI_ERRORF(fmt, ...) fprintf(stderr, "sordi: " fmt, __VA_ARGS__); + +typedef struct { + SerdWriter* writer; + SerdEnv* env; + SerdNode base_uri_node; + SerdURI base_uri; + SordModel* sord; +} State; + +static int +print_version(void) +{ + printf("sordi " SORD_VERSION " <http://drobilla.net/software/sord>\n"); + printf("Copyright 2011-2016 David Robillard <http://drobilla.net>.\n" + "License: <http://www.opensource.org/licenses/isc>\n" + "This is free software; you are free to change and redistribute it." + "\nThere is NO WARRANTY, to the extent permitted by law.\n"); + return 0; +} + +static int +print_usage(const char* name, bool error) +{ + FILE* const os = error ? stderr : stdout; + fprintf(os, "%s", error ? "\n" : ""); + fprintf(os, "Usage: %s [OPTION]... INPUT [BASE_URI]\n", name); + fprintf(os, "Load and re-serialise RDF data.\n"); + fprintf(os, "Use - for INPUT to read from standard input.\n\n"); + fprintf(os, " -h Display this help and exit\n"); + fprintf(os, " -i SYNTAX Input syntax (`turtle' or `ntriples')\n"); + fprintf(os, " -o SYNTAX Output syntax (`turtle' or `ntriples')\n"); + fprintf(os, " -s INPUT Parse INPUT as string (terminates options)\n"); + fprintf(os, " -v Display version information and exit\n"); + return error ? 1 : 0; +} + +static bool +set_syntax(SerdSyntax* syntax, const char* name) +{ + if (!strcmp(name, "turtle")) { + *syntax = SERD_TURTLE; + } else if (!strcmp(name, "ntriples")) { + *syntax = SERD_NTRIPLES; + } else { + SORDI_ERRORF("unknown syntax `%s'\n", name); + return false; + } + return true; +} + +int +main(int argc, char** argv) +{ + if (argc < 2) { + return print_usage(argv[0], true); + } + + FILE* in_fd = NULL; + SerdSyntax input_syntax = SERD_TURTLE; + SerdSyntax output_syntax = SERD_NTRIPLES; + bool from_file = true; + const uint8_t* in_name = NULL; + int a = 1; + for (; a < argc && argv[a][0] == '-'; ++a) { + if (argv[a][1] == '\0') { + in_name = (const uint8_t*)"(stdin)"; + in_fd = stdin; + break; + } else if (argv[a][1] == 'h') { + return print_usage(argv[0], false); + } else if (argv[a][1] == 'v') { + return print_version(); + } else if (argv[a][1] == 's') { + in_name = (const uint8_t*)"(string)"; + from_file = false; + ++a; + break; + } else if (argv[a][1] == 'i') { + if (++a == argc) { + SORDI_ERROR("option requires an argument -- 'i'\n\n"); + return print_usage(argv[0], true); + } + if (!set_syntax(&input_syntax, argv[a])) { + return print_usage(argv[0], true); + } + } else if (argv[a][1] == 'o') { + if (++a == argc) { + SORDI_ERROR("option requires an argument -- 'o'\n\n"); + return print_usage(argv[0], true); + } + if (!set_syntax(&output_syntax, argv[a])) { + return print_usage(argv[0], true); + } + } else { + SORDI_ERRORF("invalid option -- '%s'\n", argv[a] + 1); + return print_usage(argv[0], true); + } + } + + if (a == argc) { + SORDI_ERROR("missing input\n"); + return print_usage(argv[0], true); + } + + const uint8_t* input = (const uint8_t*)argv[a++]; + if (from_file) { + in_name = in_name ? in_name : input; + if (!in_fd) { + input = serd_uri_to_path(in_name); + if (!input || !(in_fd = fopen((const char*)input, "rb"))) { + return 1; + } + } + } + + SerdURI base_uri = SERD_URI_NULL; + SerdNode base = SERD_NODE_NULL; + if (a < argc) { // Base URI given on command line + base = serd_node_new_uri_from_string( + (const uint8_t*)argv[a], NULL, &base_uri); + } else if (from_file && in_fd != stdin) { // Use input file URI + base = serd_node_new_file_uri(input, NULL, &base_uri, true); + } + + SordWorld* world = sord_world_new(); + SordModel* sord = sord_new(world, SORD_SPO|SORD_OPS, false); + SerdEnv* env = serd_env_new(&base); + SerdReader* reader = sord_new_reader(sord, env, input_syntax, NULL); + + SerdStatus status = (from_file) + ? serd_reader_read_file_handle(reader, in_fd, in_name) + : serd_reader_read_string(reader, input); + + serd_reader_free(reader); + + FILE* out_fd = stdout; + SerdEnv* write_env = serd_env_new(&base); + + int output_style = SERD_STYLE_RESOLVED; + if (output_syntax == SERD_NTRIPLES) { + output_style |= SERD_STYLE_ASCII; + } else { + output_style |= SERD_STYLE_CURIED | SERD_STYLE_ABBREVIATED; + } + + SerdWriter* writer = serd_writer_new( + output_syntax, + (SerdStyle)output_style, + write_env, &base_uri, serd_file_sink, stdout); + + // Write @prefix directives + serd_env_foreach(env, + (SerdPrefixSink)serd_writer_set_prefix, + writer); + + // Write statements + sord_write(sord, writer, NULL); + + serd_writer_finish(writer); + serd_writer_free(writer); + + serd_env_free(env); + serd_env_free(write_env); + serd_node_free(&base); + + sord_free(sord); + sord_world_free(world); + + if (from_file) { + fclose(in_fd); + } + + if (fclose(out_fd)) { + perror("sordi: write error"); + status = SERD_ERR_UNKNOWN; + } + + return (status > SERD_FAILURE) ? 1 : 0; +} diff --git a/src/sordmm_test.cpp b/src/sordmm_test.cpp new file mode 100644 index 0000000..d0a5a3a --- /dev/null +++ b/src/sordmm_test.cpp @@ -0,0 +1,25 @@ +/* + Copyright 2011 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "sord/sordmm.hpp" + +int +main(int argc, char** argv) +{ + Sord::World world; + Sord::Model model(world, "http://example.org/"); + return 0; +} diff --git a/src/syntax.c b/src/syntax.c new file mode 100644 index 0000000..2d924b3 --- /dev/null +++ b/src/syntax.c @@ -0,0 +1,207 @@ +/* + Copyright 2011-2015 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include <assert.h> +#include <stdlib.h> +#include <string.h> + +#include "serd/serd.h" + +#include "sord_config.h" +#include "sord_internal.h" + +struct SordInserterImpl { + SordModel* model; + SerdEnv* env; +}; + +SordInserter* +sord_inserter_new(SordModel* model, + SerdEnv* env) +{ + SordInserter* inserter = (SordInserter*)malloc(sizeof(SordInserter)); + inserter->model = model; + inserter->env = env; + return inserter; +} + +void +sord_inserter_free(SordInserter* inserter) +{ + free(inserter); +} + +SerdStatus +sord_inserter_set_base_uri(SordInserter* inserter, + const SerdNode* uri) +{ + return serd_env_set_base_uri(inserter->env, uri); +} + +SerdStatus +sord_inserter_set_prefix(SordInserter* inserter, + const SerdNode* name, + const SerdNode* uri) +{ + return serd_env_set_prefix(inserter->env, name, uri); +} + +SerdStatus +sord_inserter_write_statement(SordInserter* inserter, + SerdStatementFlags flags, + const SerdNode* graph, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* object_datatype, + const SerdNode* object_lang) +{ + SordWorld* world = sord_get_world(inserter->model); + SerdEnv* env = inserter->env; + + SordNode* g = sord_node_from_serd_node(world, env, graph, NULL, NULL); + SordNode* s = sord_node_from_serd_node(world, env, subject, NULL, NULL); + SordNode* p = sord_node_from_serd_node(world, env, predicate, NULL, NULL); + SordNode* o = sord_node_from_serd_node(world, env, object, + object_datatype, object_lang); + + if (!s || !p || !o) { + return SERD_ERR_BAD_ARG; + } + + const SordQuad tup = { s, p, o, g }; + sord_add(inserter->model, tup); + + sord_node_free(world, o); + sord_node_free(world, p); + sord_node_free(world, s); + sord_node_free(world, g); + + return SERD_SUCCESS; +} + +SORD_API +SerdReader* +sord_new_reader(SordModel* model, + SerdEnv* env, + SerdSyntax syntax, + SordNode* graph) +{ + SordInserter* inserter = sord_inserter_new(model, env); + + SerdReader* reader = serd_reader_new( + syntax, inserter, (void (*)(void* ptr))sord_inserter_free, + (SerdBaseSink)sord_inserter_set_base_uri, + (SerdPrefixSink)sord_inserter_set_prefix, + (SerdStatementSink)sord_inserter_write_statement, + NULL); + + if (graph) { + serd_reader_set_default_graph(reader, sord_node_to_serd_node(graph)); + } + + return reader; +} + +static SerdStatus +write_statement(SordModel* sord, + SerdWriter* writer, + SordQuad tup, + SerdStatementFlags flags) +{ + const SordNode* s = tup[SORD_SUBJECT]; + const SordNode* p = tup[SORD_PREDICATE]; + const SordNode* o = tup[SORD_OBJECT]; + const SordNode* d = sord_node_get_datatype(o); + const SerdNode* ss = sord_node_to_serd_node(s); + const SerdNode* sp = sord_node_to_serd_node(p); + const SerdNode* so = sord_node_to_serd_node(o); + const SerdNode* sd = sord_node_to_serd_node(d); + + const char* lang_str = sord_node_get_language(o); + size_t lang_len = lang_str ? strlen(lang_str) : 0; + SerdNode language = SERD_NODE_NULL; + if (lang_str) { + language.type = SERD_LITERAL; + language.n_bytes = lang_len; + language.n_chars = lang_len; + language.buf = (const uint8_t*)lang_str; + }; + + // TODO: Subject abbreviation + + if (sord_node_is_inline_object(s) && !(flags & SERD_ANON_CONT)) { + return SERD_SUCCESS; + } + + SerdStatus st = SERD_SUCCESS; + if (sord_node_is_inline_object(o)) { + SordQuad sub_pat = { o, 0, 0, 0 }; + SordIter* sub_iter = sord_find(sord, sub_pat); + + SerdStatementFlags start_flags = flags + | ((sub_iter) ? SERD_ANON_O_BEGIN : SERD_EMPTY_O); + + st = serd_writer_write_statement( + writer, start_flags, NULL, ss, sp, so, sd, &language); + + if (!st && sub_iter) { + flags |= SERD_ANON_CONT; + for (; !st && !sord_iter_end(sub_iter); sord_iter_next(sub_iter)) { + SordQuad sub_tup; + sord_iter_get(sub_iter, sub_tup); + st = write_statement(sord, writer, sub_tup, flags); + } + sord_iter_free(sub_iter); + serd_writer_end_anon(writer, so); + } + } else { + st = serd_writer_write_statement( + writer, flags, NULL, ss, sp, so, sd, &language); + } + + return st; +} + +bool +sord_write(SordModel* model, + SerdWriter* writer, + SordNode* graph) +{ + SordQuad pat = { 0, 0, 0, graph }; + SordIter* iter = sord_find(model, pat); + return sord_write_iter(iter, writer); +} + +bool +sord_write_iter(SordIter* iter, + SerdWriter* writer) +{ + if (!iter) { + return false; + } + + SordModel* model = (SordModel*)sord_iter_get_model(iter); + SerdStatus st = SERD_SUCCESS; + for (; !st && !sord_iter_end(iter); sord_iter_next(iter)) { + SordQuad tup; + sord_iter_get(iter, tup); + st = write_statement(model, writer, tup, 0); + } + sord_iter_free(iter); + + return !st; +} diff --git a/src/zix/btree.c b/src/zix/btree.c new file mode 100644 index 0000000..78a5a0d --- /dev/null +++ b/src/zix/btree.c @@ -0,0 +1,740 @@ +/* + Copyright 2011-2014 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include <assert.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "zix/btree.h" + +// #define ZIX_BTREE_DEBUG 1 + +#ifndef ZIX_BTREE_PAGE_SIZE +# define ZIX_BTREE_PAGE_SIZE 4096 +#endif + +#define ZIX_BTREE_NODE_SPACE (ZIX_BTREE_PAGE_SIZE - 2 * sizeof(uint16_t)) +#define ZIX_BTREE_LEAF_VALS ((ZIX_BTREE_NODE_SPACE / sizeof(void*)) - 1) +#define ZIX_BTREE_INODE_VALS (ZIX_BTREE_LEAF_VALS / 2) + +struct ZixBTreeImpl { + ZixBTreeNode* root; + ZixDestroyFunc destroy; + ZixComparator cmp; + void* cmp_data; + size_t size; + unsigned height; ///< Number of levels, i.e. root only has height 1 +}; + +struct ZixBTreeNodeImpl { + uint16_t is_leaf; + uint16_t n_vals; + // On 64-bit we rely on some padding here to get page-sized nodes + void* vals[ZIX_BTREE_INODE_VALS]; // ZIX_BTREE_LEAF_VALS for leaves + ZixBTreeNode* children[ZIX_BTREE_INODE_VALS + 1]; // Nonexistent for leaves +}; + +typedef struct { + ZixBTreeNode* node; + unsigned index; +} ZixBTreeIterFrame; + +struct ZixBTreeIterImpl { + unsigned level; ///< Current level in stack + ZixBTreeIterFrame stack[]; ///< Position stack +}; + +#ifdef ZIX_BTREE_DEBUG + +ZIX_PRIVATE void +print_node(const ZixBTreeNode* n, const char* prefix) +{ + printf("%s[", prefix); + for (uint16_t v = 0; v < n->n_vals; ++v) { + printf(" %lu", (uintptr_t)n->vals[v]); + } + printf(" ]\n"); +} + +ZIX_PRIVATE void +print_tree(const ZixBTreeNode* parent, const ZixBTreeNode* node, int level) +{ + if (node) { + if (!parent) { + printf("TREE {\n"); + } + for (int i = 0; i < level + 1; ++i) { + printf(" "); + } + print_node(node, ""); + if (!node->is_leaf) { + for (uint16_t i = 0; i < node->n_vals + 1; ++i) { + print_tree(node, node->children[i], level + 1); + } + } + if (!parent) { + printf("}\n"); + } + } +} + +#endif // ZIX_BTREE_DEBUG + +ZIX_PRIVATE ZixBTreeNode* +zix_btree_node_new(const bool leaf) +{ + assert(sizeof(ZixBTreeNode) == ZIX_BTREE_PAGE_SIZE); + ZixBTreeNode* node = (ZixBTreeNode*)malloc(sizeof(ZixBTreeNode)); + if (node) { + node->is_leaf = leaf; + node->n_vals = 0; + } + return node; +} + +ZIX_API ZixBTree* +zix_btree_new(const ZixComparator cmp, + void* const cmp_data, + const ZixDestroyFunc destroy) +{ + ZixBTree* t = (ZixBTree*)malloc(sizeof(ZixBTree)); + if (t) { + t->root = zix_btree_node_new(true); + t->destroy = destroy; + t->cmp = cmp; + t->cmp_data = cmp_data; + t->size = 0; + t->height = 1; + if (!t->root) { + free(t); + return NULL; + } + } + return t; +} + +ZIX_PRIVATE void +zix_btree_free_rec(ZixBTree* const t, ZixBTreeNode* const n) +{ + if (n) { + if (t->destroy) { + for (uint16_t i = 0; i < n->n_vals; ++i) { + t->destroy(n->vals[i]); + } + } + if (!n->is_leaf) { + for (uint16_t i = 0; i < n->n_vals + 1; ++i) { + zix_btree_free_rec(t, n->children[i]); + } + } + free(n); + } +} + +ZIX_API void +zix_btree_free(ZixBTree* const t) +{ + if (t) { + zix_btree_free_rec(t, t->root); + free(t); + } +} + +ZIX_API size_t +zix_btree_size(const ZixBTree* const t) +{ + return t->size; +} + +ZIX_PRIVATE uint16_t +zix_btree_max_vals(const ZixBTreeNode* const node) +{ + return node->is_leaf ? ZIX_BTREE_LEAF_VALS : ZIX_BTREE_INODE_VALS; +} + +ZIX_PRIVATE uint16_t +zix_btree_min_vals(const ZixBTreeNode* const node) +{ + return ((zix_btree_max_vals(node) + 1) / 2) - 1; +} + +/** Shift pointers in `array` of length `n` right starting at `i`. */ +ZIX_PRIVATE void +zix_btree_ainsert(void** const array, + const uint16_t n, + const uint16_t i, + void* const e) +{ + memmove(array + i + 1, array + i, (n - i) * sizeof(e)); + array[i] = e; +} + +/** Erase element `i` in `array` of length `n` and return erased element. */ +ZIX_PRIVATE void* +zix_btree_aerase(void** const array, const uint16_t n, const uint16_t i) +{ + void* const ret = array[i]; + memmove(array + i, array + i + 1, (n - i) * sizeof(ret)); + return ret; +} + +/** Split lhs, the i'th child of `n`, into two nodes. */ +ZIX_PRIVATE ZixBTreeNode* +zix_btree_split_child(ZixBTreeNode* const n, + const uint16_t i, + ZixBTreeNode* const lhs) +{ + assert(lhs->n_vals == zix_btree_max_vals(lhs)); + assert(n->n_vals < ZIX_BTREE_INODE_VALS); + assert(i < n->n_vals + 1); + assert(n->children[i] == lhs); + + const uint16_t max_n_vals = zix_btree_max_vals(lhs); + ZixBTreeNode* rhs = zix_btree_node_new(lhs->is_leaf); + if (!rhs) { + return NULL; + } + + // LHS and RHS get roughly half, less the middle value which moves up + lhs->n_vals = max_n_vals / 2; + rhs->n_vals = max_n_vals - lhs->n_vals - 1; + + // Copy large half of values from LHS to new RHS node + memcpy(rhs->vals, + lhs->vals + lhs->n_vals + 1, + rhs->n_vals * sizeof(void*)); + + // Copy large half of children from LHS to new RHS node + if (!lhs->is_leaf) { + memcpy(rhs->children, + lhs->children + lhs->n_vals + 1, + (rhs->n_vals + 1) * sizeof(ZixBTreeNode*)); + } + + // Move middle value up to parent + zix_btree_ainsert(n->vals, n->n_vals, i, lhs->vals[lhs->n_vals]); + + // Insert new RHS node in parent at position i + zix_btree_ainsert((void**)n->children, ++n->n_vals, i + 1, rhs); + + return rhs; +} + +/** Find the first value in `n` that is not less than `e` (lower bound). */ +ZIX_PRIVATE uint16_t +zix_btree_node_find(const ZixBTree* const t, + const ZixBTreeNode* const n, + const void* const e, + bool* const equal) +{ + uint16_t first = 0; + uint16_t len = n->n_vals; + while (len > 0) { + const uint16_t half = len >> 1; + const uint16_t i = first + half; + const int cmp = t->cmp(n->vals[i], e, t->cmp_data); + if (cmp == 0) { + *equal = true; + len = half; // Keep searching for wildcard matches + } else if (cmp < 0) { + const uint16_t chop = half + 1; + first += chop; + len -= chop; + } else { + len = half; + } + } + assert(!*equal || t->cmp(n->vals[first], e, t->cmp_data) == 0); + return first; +} + +ZIX_API ZixStatus +zix_btree_insert(ZixBTree* const t, void* const e) +{ + ZixBTreeNode* parent = NULL; // Parent of n + ZixBTreeNode* n = t->root; // Current node + uint16_t i = 0; // Index of n in parent + while (n) { + if (n->n_vals == zix_btree_max_vals(n)) { + // Node is full, split to ensure there is space for a leaf split + if (!parent) { + // Root is full, grow tree upwards + if (!(parent = zix_btree_node_new(false))) { + return ZIX_STATUS_NO_MEM; + } + t->root = parent; + parent->children[0] = n; + ++t->height; + } + + ZixBTreeNode* const rhs = zix_btree_split_child(parent, i, n); + if (!rhs) { + return ZIX_STATUS_NO_MEM; + } + + const int cmp = t->cmp(parent->vals[i], e, t->cmp_data); + if (cmp == 0) { + return ZIX_STATUS_EXISTS; + } else if (cmp < 0) { + // Move to new RHS + n = rhs; + ++i; + } + } + + assert(!parent || parent->children[i] == n); + + bool equal = false; + i = zix_btree_node_find(t, n, e, &equal); + if (equal) { + return ZIX_STATUS_EXISTS; + } else if (!n->is_leaf) { + // Descend to child node left of value + parent = n; + n = n->children[i]; + } else { + // Insert into internal node + zix_btree_ainsert(n->vals, n->n_vals++, i, e); + break; + } + } + + ++t->size; + + return ZIX_STATUS_SUCCESS; +} + +ZIX_PRIVATE ZixBTreeIter* +zix_btree_iter_new(const ZixBTree* const t) +{ + const size_t s = t->height * sizeof(ZixBTreeIterFrame); + + return (ZixBTreeIter*)calloc(1, sizeof(ZixBTreeIter) + s); +} + +ZIX_PRIVATE void +zix_btree_iter_set_frame(ZixBTreeIter* const ti, + ZixBTreeNode* const n, + const uint16_t i) +{ + if (ti) { + ti->stack[ti->level].node = n; + ti->stack[ti->level].index = i; + } +} + +ZIX_PRIVATE bool +zix_btree_node_is_minimal(ZixBTreeNode* const n) +{ + assert(n->n_vals >= zix_btree_min_vals(n)); + return n->n_vals == zix_btree_min_vals(n); +} + +/** Enlarge left child by stealing a value from its right sibling. */ +ZIX_PRIVATE ZixBTreeNode* +zix_btree_rotate_left(ZixBTreeNode* const parent, const uint16_t i) +{ + ZixBTreeNode* const lhs = parent->children[i]; + ZixBTreeNode* const rhs = parent->children[i + 1]; + + // Move parent value to end of LHS + lhs->vals[lhs->n_vals++] = parent->vals[i]; + + // Move first child pointer from RHS to end of LHS + if (!lhs->is_leaf) { + lhs->children[lhs->n_vals] = (ZixBTreeNode*)zix_btree_aerase( + (void**)rhs->children, rhs->n_vals, 0); + } + + // Move first value in RHS to parent + parent->vals[i] = zix_btree_aerase(rhs->vals, --rhs->n_vals, 0); + + return lhs; +} + +/** Enlarge right child by stealing a value from its left sibling. */ +ZIX_PRIVATE ZixBTreeNode* +zix_btree_rotate_right(ZixBTreeNode* const parent, const uint16_t i) +{ + ZixBTreeNode* const lhs = parent->children[i - 1]; + ZixBTreeNode* const rhs = parent->children[i]; + + // Prepend parent value to RHS + zix_btree_ainsert(rhs->vals, rhs->n_vals++, 0, parent->vals[i - 1]); + + // Move last child pointer from LHS and prepend to RHS + if (!lhs->is_leaf) { + zix_btree_ainsert((void**)rhs->children, + rhs->n_vals, + 0, + lhs->children[lhs->n_vals]); + } + + // Move last value from LHS to parent + parent->vals[i - 1] = lhs->vals[--lhs->n_vals]; + + return rhs; +} + +/** Move n[i] down, merge the left and right child, return the merged node. */ +ZIX_PRIVATE ZixBTreeNode* +zix_btree_merge(ZixBTree* const t, ZixBTreeNode* const n, const uint16_t i) +{ + ZixBTreeNode* const lhs = n->children[i]; + ZixBTreeNode* const rhs = n->children[i + 1]; + + assert(zix_btree_node_is_minimal(n->children[i])); + assert(lhs->n_vals + rhs->n_vals < zix_btree_max_vals(lhs)); + + // Move parent value to end of LHS + lhs->vals[lhs->n_vals++] = zix_btree_aerase(n->vals, n->n_vals, i); + + // Erase corresponding child pointer (to RHS) in parent + zix_btree_aerase((void**)n->children, n->n_vals, i + 1); + + // Add everything from RHS to end of LHS + memcpy(lhs->vals + lhs->n_vals, rhs->vals, rhs->n_vals * sizeof(void*)); + if (!lhs->is_leaf) { + memcpy(lhs->children + lhs->n_vals, + rhs->children, + (rhs->n_vals + 1) * sizeof(void*)); + } + lhs->n_vals += rhs->n_vals; + + if (--n->n_vals == 0) { + // Root is now empty, replace it with its only child + assert(n == t->root); + t->root = lhs; + free(n); + } + + free(rhs); + return lhs; +} + +/** Remove and return the min value from the subtree rooted at `n`. */ +ZIX_PRIVATE void* +zix_btree_remove_min(ZixBTree* const t, ZixBTreeNode* n) +{ + while (!n->is_leaf) { + if (zix_btree_node_is_minimal(n->children[0])) { + // Leftmost child is minimal, must expand + if (!zix_btree_node_is_minimal(n->children[1])) { + // Child's right sibling has at least one key to steal + n = zix_btree_rotate_left(n, 0); + } else { + // Both child and right sibling are minimal, merge + n = zix_btree_merge(t, n, 0); + } + } else { + n = n->children[0]; + } + } + + return zix_btree_aerase(n->vals, --n->n_vals, 0); +} + +/** Remove and return the max value from the subtree rooted at `n`. */ +ZIX_PRIVATE void* +zix_btree_remove_max(ZixBTree* const t, ZixBTreeNode* n) +{ + while (!n->is_leaf) { + if (zix_btree_node_is_minimal(n->children[n->n_vals])) { + // Leftmost child is minimal, must expand + if (!zix_btree_node_is_minimal(n->children[n->n_vals - 1])) { + // Child's left sibling has at least one key to steal + n = zix_btree_rotate_right(n, n->n_vals); + } else { + // Both child and left sibling are minimal, merge + n = zix_btree_merge(t, n, n->n_vals - 1); + } + } else { + n = n->children[n->n_vals]; + } + } + + return n->vals[--n->n_vals]; +} + +ZIX_API ZixStatus +zix_btree_remove(ZixBTree* const t, + const void* const e, + void** const out, + ZixBTreeIter** const next) +{ + ZixBTreeNode* n = t->root; + ZixBTreeIter* ti = NULL; + const bool user_iter = next && *next; + if (next) { + if (!*next && !(*next = zix_btree_iter_new(t))) { + return ZIX_STATUS_NO_MEM; + } + ti = *next; + ti->level = 0; + } + + while (true) { + /* To remove in a single walk down, the tree is adjusted along the way + so that the current node always has at least one more value than the + minimum required in general. Thus, there is always room to remove + without adjusting on the way back up. */ + assert(n == t->root || !zix_btree_node_is_minimal(n)); + + bool equal = false; + const uint16_t i = zix_btree_node_find(t, n, e, &equal); + zix_btree_iter_set_frame(ti, n, i); + if (n->is_leaf) { + if (equal) { + // Found in leaf node + *out = zix_btree_aerase(n->vals, --n->n_vals, i); + if (ti && i == n->n_vals) { + if (i == 0) { + ti->stack[ti->level = 0].node = NULL; + } else { + --ti->stack[ti->level].index; + zix_btree_iter_increment(ti); + } + } + --t->size; + return ZIX_STATUS_SUCCESS; + } else { + // Not found in leaf node, or tree + if (ti && !user_iter) { + zix_btree_iter_free(ti); + *next = NULL; + } + return ZIX_STATUS_NOT_FOUND; + } + } else if (equal) { + // Found in internal node + if (!zix_btree_node_is_minimal(n->children[i])) { + // Left child can remove without merge + *out = n->vals[i]; + n->vals[i] = zix_btree_remove_max(t, n->children[i]); + --t->size; + return ZIX_STATUS_SUCCESS; + } else if (!zix_btree_node_is_minimal(n->children[i + 1])) { + // Right child can remove without merge + *out = n->vals[i]; + n->vals[i] = zix_btree_remove_min(t, n->children[i + 1]); + --t->size; + return ZIX_STATUS_SUCCESS; + } else { + // Both preceding and succeeding child are minimal + n = zix_btree_merge(t, n, i); + } + } else { + // Not found in internal node, key is in/under children[i] + if (zix_btree_node_is_minimal(n->children[i])) { + if (i > 0 && !zix_btree_node_is_minimal(n->children[i - 1])) { + // Steal a key from child's left sibling + n = zix_btree_rotate_right(n, i); + } else if (i < n->n_vals && + !zix_btree_node_is_minimal(n->children[i + 1])) { + // Steal a key from child's right sibling + n = zix_btree_rotate_left(n, i); + } else { + // Both child's siblings are minimal, merge them + if (i < n->n_vals) { + n = zix_btree_merge(t, n, i); + } else { + n = zix_btree_merge(t, n, i - 1); + if (ti) { + --ti->stack[ti->level].index; + } + } + } + } else { + n = n->children[i]; + } + } + if (ti) { + ++ti->level; + } + } + + assert(false); // Not reached + return ZIX_STATUS_ERROR; +} + +ZIX_API ZixStatus +zix_btree_find(const ZixBTree* const t, + const void* const e, + ZixBTreeIter** const ti) +{ + ZixBTreeNode* n = t->root; + if (!(*ti = zix_btree_iter_new(t))) { + return ZIX_STATUS_NO_MEM; + } + + while (n) { + bool equal = false; + const uint16_t i = zix_btree_node_find(t, n, e, &equal); + + zix_btree_iter_set_frame(*ti, n, i); + + if (equal) { + return ZIX_STATUS_SUCCESS; + } else if (n->is_leaf) { + break; + } else { + ++(*ti)->level; + n = n->children[i]; + } + } + + zix_btree_iter_free(*ti); + *ti = NULL; + return ZIX_STATUS_NOT_FOUND; +} + +ZIX_API ZixStatus +zix_btree_lower_bound(const ZixBTree* const t, + const void* const e, + ZixBTreeIter** const ti) +{ + if (!t) { + *ti = NULL; + return ZIX_STATUS_BAD_ARG; + } + + ZixBTreeNode* n = t->root; + bool found = false; + unsigned found_level = 0; + if (!(*ti = zix_btree_iter_new(t))) { + return ZIX_STATUS_NO_MEM; + } + + while (n) { + bool equal = false; + const uint16_t i = zix_btree_node_find(t, n, e, &equal); + + zix_btree_iter_set_frame(*ti, n, i); + + if (equal) { + found_level = (*ti)->level; + found = true; + } + + if (n->is_leaf) { + break; + } else { + ++(*ti)->level; + n = n->children[i]; + assert(n); + } + } + + const ZixBTreeIterFrame* const frame = &(*ti)->stack[(*ti)->level]; + assert(frame->node); + if (frame->index == frame->node->n_vals) { + if (found) { + // Found on a previous level but went too far + (*ti)->level = found_level; + } else { + // Reached end (key is greater than everything in tree) + (*ti)->stack[0].node = NULL; + } + } + + return ZIX_STATUS_SUCCESS; +} + +ZIX_API void* +zix_btree_get(const ZixBTreeIter* const ti) +{ + const ZixBTreeIterFrame* const frame = &ti->stack[ti->level]; + assert(frame->node); + assert(frame->index < frame->node->n_vals); + return frame->node->vals[frame->index]; +} + +ZIX_API ZixBTreeIter* +zix_btree_begin(const ZixBTree* const t) +{ + ZixBTreeIter* const i = zix_btree_iter_new(t); + if (!i) { + return NULL; + } else if (t->size == 0) { + i->stack[0].node = NULL; + } else { + ZixBTreeNode* n = t->root; + i->stack[0].node = n; + i->stack[0].index = 0; + while (!n->is_leaf) { + n = n->children[0]; + ++i->level; + i->stack[i->level].node = n; + i->stack[i->level].index = 0; + } + } + return i; +} + +ZIX_API bool +zix_btree_iter_is_end(const ZixBTreeIter* const i) +{ + return !i || i->stack[0].node == NULL; +} + +ZIX_API void +zix_btree_iter_increment(ZixBTreeIter* const i) +{ + ZixBTreeIterFrame* f = &i->stack[i->level]; + if (f->node->is_leaf) { + // Leaf, move right + assert(f->index < f->node->n_vals); + if (++f->index == f->node->n_vals) { + // Reached end of leaf, move up + f = &i->stack[i->level]; + while (i->level > 0 && f->index == f->node->n_vals) { + f = &i->stack[--i->level]; + assert(f->index <= f->node->n_vals); + } + + if (f->index == f->node->n_vals) { + // Reached end of tree + assert(i->level == 0); + f->node = NULL; + f->index = 0; + } + } + } else { + // Internal node, move down to next child + assert(f->index < f->node->n_vals); + ZixBTreeNode* child = f->node->children[++f->index]; + + f = &i->stack[++i->level]; + f->node = child; + f->index = 0; + + // Move down and left until we hit a leaf + while (!f->node->is_leaf) { + child = f->node->children[0]; + f = &i->stack[++i->level]; + f->node = child; + f->index = 0; + } + } +} + +ZIX_API void +zix_btree_iter_free(ZixBTreeIter* const i) +{ + free(i); +} diff --git a/src/zix/btree.h b/src/zix/btree.h new file mode 100644 index 0000000..91b38cb --- /dev/null +++ b/src/zix/btree.h @@ -0,0 +1,155 @@ +/* + Copyright 2011-2016 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef ZIX_BTREE_H +#define ZIX_BTREE_H + +#include <stddef.h> + +#include "zix/common.h" + +#ifdef __cplusplus +extern "C" { +#else +# include <stdbool.h> +#endif + +/** + @addtogroup zix + @{ + @name BTree + @{ +*/ + +/** + A B-Tree. +*/ +typedef struct ZixBTreeImpl ZixBTree; + +/** + A B-Tree node (opaque). +*/ +typedef struct ZixBTreeNodeImpl ZixBTreeNode; + +/** + An iterator over a B-Tree. + + Note that modifying the trees invalidates all iterators, so all iterators + are const iterators. +*/ +typedef struct ZixBTreeIterImpl ZixBTreeIter; + +/** + Create a new (empty) B-Tree. +*/ +ZIX_API ZixBTree* +zix_btree_new(ZixComparator cmp, + void* cmp_data, + ZixDestroyFunc destroy); + +/** + Free `t`. +*/ +ZIX_API void +zix_btree_free(ZixBTree* t); + +/** + Return the number of elements in `t`. +*/ +ZIX_API size_t +zix_btree_size(const ZixBTree* t); + +/** + Insert the element `e` into `t`. +*/ +ZIX_API ZixStatus +zix_btree_insert(ZixBTree* t, void* e); + +/** + Remove the value `e` from `t`. + + @param t Tree to remove from. + + @param e Value to remove. + + @param out Set to point to the removed pointer (which may not equal `e`). + + @param next If non-NULL, pointed to the value following `e`. If *next is + also non-NULL, the iterator is reused, otherwise a new one is allocated. To + reuse an iterator, no items may have been added since its creation. +*/ +ZIX_API ZixStatus +zix_btree_remove(ZixBTree* t, const void* e, void** out, ZixBTreeIter** next); + +/** + Set `ti` to an element equal to `e` in `t`. + If no such item exists, `ti` is set to NULL. +*/ +ZIX_API ZixStatus +zix_btree_find(const ZixBTree* t, const void* e, ZixBTreeIter** ti); + +/** + Set `ti` to the smallest element in `t` that is not less than `e`. + + Wildcards are supported, so if the search key `e` compares equal to many + values in the tree, `ti` will be set to the least such element. The search + key `e` is always passed as the second argument to the comparator. +*/ +ZIX_API ZixStatus +zix_btree_lower_bound(const ZixBTree* t, const void* e, ZixBTreeIter** ti); + +/** + Return the data associated with the given tree item. +*/ +ZIX_API void* +zix_btree_get(const ZixBTreeIter* ti); + +/** + Return an iterator to the first (smallest) element in `t`. + + The returned iterator must be freed with zix_btree_iter_free(). +*/ +ZIX_API ZixBTreeIter* +zix_btree_begin(const ZixBTree* t); + +/** + Return true iff `i` is an iterator to the end of its tree. +*/ +ZIX_API bool +zix_btree_iter_is_end(const ZixBTreeIter* i); + +/** + Increment `i` to point to the next element in the tree. +*/ +ZIX_API void +zix_btree_iter_increment(ZixBTreeIter* i); + +/** + Free `i`. +*/ +ZIX_API void +zix_btree_iter_free(ZixBTreeIter* i); + +/** + @} + @} +*/ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* ZIX_BTREE_H */ diff --git a/src/zix/common.h b/src/zix/common.h new file mode 100644 index 0000000..7e5e2f0 --- /dev/null +++ b/src/zix/common.h @@ -0,0 +1,88 @@ +/* + Copyright 2012 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef ZIX_COMMON_H +#define ZIX_COMMON_H + +/** + @addtogroup zix + @{ +*/ + +/** @cond */ +#ifdef ZIX_SHARED +# ifdef _WIN32 +# define ZIX_LIB_IMPORT __declspec(dllimport) +# define ZIX_LIB_EXPORT __declspec(dllexport) +# else +# define ZIX_LIB_IMPORT __attribute__((visibility("default"))) +# define ZIX_LIB_EXPORT __attribute__((visibility("default"))) +# endif +# ifdef ZIX_INTERNAL +# define ZIX_API ZIX_LIB_EXPORT +# else +# define ZIX_API ZIX_LIB_IMPORT +# endif +# define ZIX_PRIVATE static +#elif defined(ZIX_INLINE) +# define ZIX_API static inline +# define ZIX_PRIVATE static inline +#else +# define ZIX_API +# define ZIX_PRIVATE static +#endif +/** @endcond */ + +#ifdef __cplusplus +extern "C" { +#else +# include <stdbool.h> +#endif + +typedef enum { + ZIX_STATUS_SUCCESS, + ZIX_STATUS_ERROR, + ZIX_STATUS_NO_MEM, + ZIX_STATUS_NOT_FOUND, + ZIX_STATUS_EXISTS, + ZIX_STATUS_BAD_ARG, + ZIX_STATUS_BAD_PERMS, +} ZixStatus; + +/** + Function for comparing two elements. +*/ +typedef int (*ZixComparator)(const void* a, const void* b, void* user_data); + +/** + Function for testing equality of two elements. +*/ +typedef bool (*ZixEqualFunc)(const void* a, const void* b); + +/** + Function to destroy an element. +*/ +typedef void (*ZixDestroyFunc)(void* ptr); + +/** + @} +*/ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* ZIX_COMMON_H */ diff --git a/src/zix/digest.c b/src/zix/digest.c new file mode 100644 index 0000000..7d9c035 --- /dev/null +++ b/src/zix/digest.c @@ -0,0 +1,57 @@ +/* + Copyright 2012-2014 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "zix/digest.h" + +#ifdef __SSE4_2__ +# include <smmintrin.h> +#endif + +ZIX_API uint32_t +zix_digest_start(void) +{ +#ifdef __SSE4_2__ + return 1; // CRC32 initial value +#else + return 5381; // DJB hash initial value +#endif +} + +ZIX_API uint32_t +zix_digest_add(uint32_t hash, const void* const buf, const size_t len) +{ + const uint8_t* str = (const uint8_t*)buf; +#ifdef __SSE4_2__ + // SSE 4.2 CRC32 + for (size_t i = 0; i < (len / sizeof(uint32_t)); ++i) { + hash = _mm_crc32_u32(hash, *(const uint32_t*)str); + str += sizeof(uint32_t); + } + if (len & sizeof(uint16_t)) { + hash = _mm_crc32_u16(hash, *(const uint16_t*)str); + str += sizeof(uint16_t); + } + if (len & sizeof(uint8_t)) { + hash = _mm_crc32_u8(hash, *(const uint8_t*)str); + } +#else + // Classic DJB hash + for (size_t i = 0; i < len; ++i) { + hash = (hash << 5) + hash + str[i]; + } +#endif + return hash; +} diff --git a/src/zix/digest.h b/src/zix/digest.h new file mode 100644 index 0000000..8c96492 --- /dev/null +++ b/src/zix/digest.h @@ -0,0 +1,39 @@ +/* + Copyright 2012 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef ZIX_DIGEST_H +#define ZIX_DIGEST_H + +#include <stddef.h> +#include <stdint.h> + +#include "zix/common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +ZIX_API uint32_t +zix_digest_start(void); + +ZIX_API uint32_t +zix_digest_add(uint32_t hash, const void* buf, size_t len); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* ZIX_DIGEST_H */ diff --git a/src/zix/hash.c b/src/zix/hash.c new file mode 100644 index 0000000..f633e16 --- /dev/null +++ b/src/zix/hash.c @@ -0,0 +1,232 @@ +/* + Copyright 2011-2014 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include <assert.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include "zix/hash.h" + +/** + Primes, each slightly less than twice its predecessor, and as far away + from powers of two as possible. +*/ +static const unsigned sizes[] = { + 53, 97, 193, 389, 769, 1543, 3079, 6151, 12289, 24593, 49157, 98317, + 196613, 393241, 786433, 1572869, 3145739, 6291469, 12582917, 25165843, + 50331653, 100663319, 201326611, 402653189, 805306457, 1610612741, 0 +}; + +typedef struct ZixHashEntry { + struct ZixHashEntry* next; ///< Next entry in bucket + uint32_t hash; ///< Non-modulo hash value + // Value follows here (access with zix_hash_value) +} ZixHashEntry; + +struct ZixHashImpl { + ZixHashFunc hash_func; + ZixEqualFunc equal_func; + ZixHashEntry** buckets; + const unsigned* n_buckets; + size_t value_size; + unsigned count; +}; + +static inline void* +zix_hash_value(ZixHashEntry* entry) +{ + return entry + 1; +} + +ZIX_API ZixHash* +zix_hash_new(ZixHashFunc hash_func, + ZixEqualFunc equal_func, + size_t value_size) +{ + ZixHash* hash = (ZixHash*)malloc(sizeof(ZixHash)); + if (hash) { + hash->hash_func = hash_func; + hash->equal_func = equal_func; + hash->n_buckets = &sizes[0]; + hash->value_size = value_size; + hash->count = 0; + if (!(hash->buckets = (ZixHashEntry**)calloc(*hash->n_buckets, + sizeof(ZixHashEntry*)))) { + free(hash); + return NULL; + } + } + return hash; +} + +ZIX_API void +zix_hash_free(ZixHash* hash) +{ + for (unsigned b = 0; b < *hash->n_buckets; ++b) { + ZixHashEntry* bucket = hash->buckets[b]; + for (ZixHashEntry* e = bucket; e;) { + ZixHashEntry* next = e->next; + free(e); + e = next; + } + } + + free(hash->buckets); + free(hash); +} + +ZIX_API size_t +zix_hash_size(const ZixHash* hash) +{ + return hash->count; +} + +static inline void +insert_entry(ZixHashEntry** bucket, ZixHashEntry* entry) +{ + entry->next = *bucket; + *bucket = entry; +} + +static inline ZixStatus +rehash(ZixHash* hash, unsigned new_n_buckets) +{ + ZixHashEntry** new_buckets = (ZixHashEntry**)calloc( + new_n_buckets, sizeof(ZixHashEntry*)); + if (!new_buckets) { + return ZIX_STATUS_NO_MEM; + } + + const unsigned old_n_buckets = *hash->n_buckets; + for (unsigned b = 0; b < old_n_buckets; ++b) { + for (ZixHashEntry* e = hash->buckets[b]; e;) { + ZixHashEntry* const next = e->next; + const unsigned h = e->hash % new_n_buckets; + insert_entry(&new_buckets[h], e); + e = next; + } + } + + free(hash->buckets); + hash->buckets = new_buckets; + + return ZIX_STATUS_SUCCESS; +} + +static inline ZixHashEntry* +find_entry(const ZixHash* hash, + const void* key, + const unsigned h, + const unsigned h_nomod) +{ + for (ZixHashEntry* e = hash->buckets[h]; e; e = e->next) { + if (e->hash == h_nomod && hash->equal_func(zix_hash_value(e), key)) { + return e; + } + } + return NULL; +} + +ZIX_API const void* +zix_hash_find(const ZixHash* hash, const void* value) +{ + const unsigned h_nomod = hash->hash_func(value); + const unsigned h = h_nomod % *hash->n_buckets; + ZixHashEntry* const entry = find_entry(hash, value, h, h_nomod); + return entry ? zix_hash_value(entry) : 0; +} + +ZIX_API ZixStatus +zix_hash_insert(ZixHash* hash, const void* value, const void** inserted) +{ + unsigned h_nomod = hash->hash_func(value); + unsigned h = h_nomod % *hash->n_buckets; + + ZixHashEntry* elem = find_entry(hash, value, h, h_nomod); + if (elem) { + assert(elem->hash == h_nomod); + if (inserted) { + *inserted = zix_hash_value(elem); + } + return ZIX_STATUS_EXISTS; + } + + elem = (ZixHashEntry*)malloc(sizeof(ZixHashEntry) + hash->value_size); + if (!elem) { + return ZIX_STATUS_NO_MEM; + } + elem->next = NULL; + elem->hash = h_nomod; + memcpy(elem + 1, value, hash->value_size); + + const unsigned next_n_buckets = *(hash->n_buckets + 1); + if (next_n_buckets != 0 && (hash->count + 1) >= next_n_buckets) { + if (!rehash(hash, next_n_buckets)) { + h = h_nomod % *(++hash->n_buckets); + } + } + + insert_entry(&hash->buckets[h], elem); + ++hash->count; + if (inserted) { + *inserted = zix_hash_value(elem); + } + return ZIX_STATUS_SUCCESS; +} + +ZIX_API ZixStatus +zix_hash_remove(ZixHash* hash, const void* value) +{ + const unsigned h_nomod = hash->hash_func(value); + const unsigned h = h_nomod % *hash->n_buckets; + + ZixHashEntry** next_ptr = &hash->buckets[h]; + for (ZixHashEntry* e = hash->buckets[h]; e; e = e->next) { + if (h_nomod == e->hash && + hash->equal_func(zix_hash_value(e), value)) { + *next_ptr = e->next; + free(e); + return ZIX_STATUS_SUCCESS; + } + next_ptr = &e->next; + } + + if (hash->n_buckets != sizes) { + const unsigned prev_n_buckets = *(hash->n_buckets - 1); + if (hash->count - 1 <= prev_n_buckets) { + if (!rehash(hash, prev_n_buckets)) { + --hash->n_buckets; + } + } + } + + --hash->count; + return ZIX_STATUS_NOT_FOUND; +} + +ZIX_API void +zix_hash_foreach(ZixHash* hash, + ZixHashVisitFunc f, + void* user_data) +{ + for (unsigned b = 0; b < *hash->n_buckets; ++b) { + ZixHashEntry* bucket = hash->buckets[b]; + for (ZixHashEntry* e = bucket; e; e = e->next) { + f(zix_hash_value(e), user_data); + } + } +} diff --git a/src/zix/hash.h b/src/zix/hash.h new file mode 100644 index 0000000..db24c45 --- /dev/null +++ b/src/zix/hash.h @@ -0,0 +1,140 @@ +/* + Copyright 2011-2015 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef ZIX_HASH_H +#define ZIX_HASH_H + +#include <stddef.h> +#include <stdint.h> + +#include "zix/common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @addtogroup zix + @{ + @name Hash + @{ +*/ + +typedef struct ZixHashImpl ZixHash; + +/** + Function for computing the hash of an element. +*/ +typedef uint32_t (*ZixHashFunc)(const void* value); + +/** + Function to visit a hash element. +*/ +typedef void (*ZixHashVisitFunc)(void* value, + void* user_data); + +/** + Create a new hash table. + + To minimize space overhead, unlike many hash tables this stores a single + value, not a key and a value. Any size of value can be stored, but all the + values in the hash table must be the same size, and the values must be safe + to copy with memcpy. To get key:value behaviour, simply insert a struct + with a key and value into the hash. + + @param hash_func The hashing function. + @param equal_func A function to test value equality. + @param value_size The size of the values to be stored. +*/ +ZIX_API ZixHash* +zix_hash_new(ZixHashFunc hash_func, + ZixEqualFunc equal_func, + size_t value_size); + +/** + Free `hash`. +*/ +ZIX_API void +zix_hash_free(ZixHash* hash); + +/** + Return the number of elements in `hash`. +*/ +ZIX_API size_t +zix_hash_size(const ZixHash* hash); + +/** + Insert an item into `hash`. + + If no matching value is found, ZIX_STATUS_SUCCESS will be returned, and @p + inserted will be pointed to the copy of `value` made in the new hash node. + + If a matching value already exists, ZIX_STATUS_EXISTS will be returned, and + `inserted` will be pointed to the existing value. + + @param hash The hash table. + @param value The value to be inserted. + @param inserted The copy of `value` in the hash table. + @return ZIX_STATUS_SUCCESS, ZIX_STATUS_EXISTS, or ZIX_STATUS_NO_MEM. +*/ +ZIX_API ZixStatus +zix_hash_insert(ZixHash* hash, + const void* value, + const void** inserted); + +/** + Remove an item from `hash`. + + @param hash The hash table. + @param value The value to remove. + @return ZIX_STATUS_SUCCES or ZIX_STATUS_NOT_FOUND. +*/ +ZIX_API ZixStatus +zix_hash_remove(ZixHash* hash, + const void* value); + +/** + Search for an item in `hash`. + + @param hash The hash table. + @param value The value to search for. +*/ +ZIX_API const void* +zix_hash_find(const ZixHash* hash, + const void* value); + +/** + Call `f` on each value in `hash`. + + @param hash The hash table. + @param f The function to call on each value. + @param user_data The user_data parameter passed to `f`. +*/ +ZIX_API void +zix_hash_foreach(ZixHash* hash, + ZixHashVisitFunc f, + void* user_data); + +/** + @} + @} +*/ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* ZIX_HASH_H */ |