From 582bfbe1cb0a6aa833f56c5bd8cf42abe5c5d13a Mon Sep 17 00:00:00 2001 From: David Robillard Date: Sat, 12 May 2018 13:28:47 +0200 Subject: WIP: Add model --- serd/serd.h | 320 +++++++++++++++++++++++++ src/inserter.c | 93 ++++++++ src/inserter.h | 29 +++ src/iter.c | 234 +++++++++++++++++++ src/iter.h | 90 +++++++ src/log.h | 54 +++++ src/model.c | 474 +++++++++++++++++++++++++++++++++++++ src/model.h | 39 ++++ src/node.c | 24 ++ src/node.h | 1 + src/nodes.c | 109 +++++++++ src/nodes.h | 33 +++ src/serdi.c | 47 +++- src/statement.c | 70 ++++++ src/statement.h | 47 ++++ src/string.c | 1 + src/world.c | 2 + src/world.h | 3 + tests/model_test.c | 673 +++++++++++++++++++++++++++++++++++++++++++++++++++++ wscript | 88 ++++--- 20 files changed, 2397 insertions(+), 34 deletions(-) create mode 100644 src/inserter.c create mode 100644 src/inserter.h create mode 100644 src/iter.c create mode 100644 src/iter.h create mode 100644 src/log.h create mode 100644 src/model.c create mode 100644 src/model.h create mode 100644 src/nodes.c create mode 100644 src/nodes.h create mode 100644 src/statement.c create mode 100644 src/statement.h create mode 100644 tests/model_test.c diff --git a/serd/serd.h b/serd/serd.h index f148acb4..e7b47693 100644 --- a/serd/serd.h +++ b/serd/serd.h @@ -62,6 +62,33 @@ extern "C" { */ typedef struct SerdWorldImpl SerdWorld; +/** + Model. + + A model is an indexed set of statements. It may be searched using various + patterns depending on which indices are enabled. +*/ +typedef struct SerdModelImpl SerdModel; + +/** + Model Inserter. + + An inserter is used for writing statements to a model using the Serd sink + interface. This makes it simple to write to a model directly using a + SerdReader, or any other code that writes statements to a SerdStatementSink. +*/ +typedef struct SerdInserterImpl SerdInserter; + +/** + Model Iterator. +*/ +typedef struct SerdIterImpl SerdIter; + +/** + Statement. +*/ +typedef struct SerdStatementImpl SerdStatement; + /** Environment. @@ -97,6 +124,7 @@ typedef enum { SERD_ERR_UNKNOWN, /**< Unknown error */ SERD_ERR_BAD_SYNTAX, /**< Invalid syntax */ SERD_ERR_BAD_ARG, /**< Invalid argument */ + SERD_ERR_BAD_ITER, /**< Use of invalidated iterator */ SERD_ERR_NOT_FOUND, /**< Not found */ SERD_ERR_ID_CLASH, /**< Encountered clashing blank node IDs */ SERD_ERR_BAD_CURIE, /**< Invalid CURIE (e.g. prefix does not exist) */ @@ -215,6 +243,40 @@ typedef enum { SERD_HAS_LANGUAGE = 1 << 3 /**< Literal node has language */ } SerdNodeFlag; +/** + Field in a statement. +*/ +typedef enum { + SERD_SUBJECT = 0, /**< Subject */ + SERD_PREDICATE = 1, /**< Predicate ("key") */ + SERD_OBJECT = 2, /**< Object ("value") */ + SERD_GRAPH = 3 /**< Graph ("context") */ +} SerdField; + +/** + Flags for model features. +*/ +typedef enum { + SERD_MODEL_GRAPHS = 1 /**< Support multiple graphs in model. */ +} SerdModelFlag; + +/** + Bitwise OR of SerdModelFlag values. +*/ +typedef uint32_t SerdModelFlags; + +/** + Indexing option. +*/ +typedef enum { + SERD_SPO = 1, /**< Subject, Predicate, Object */ + SERD_SOP = 1 << 1, /**< Subject, Object, Predicate */ + SERD_OPS = 1 << 2, /**< Object, Predicate, Subject */ + SERD_OSP = 1 << 3, /**< Object, Subject, Predicate */ + SERD_PSO = 1 << 4, /**< Predicate, Subject, Object */ + SERD_POS = 1 << 5 /**< Predicate, Object, Subject */ +} SerdIndexOption; + /** Bitwise OR of SerdNodeFlag values. */ @@ -1096,6 +1158,264 @@ SERD_API SerdStatus serd_writer_finish(SerdWriter* writer); +/** + @} + @name Model + @{ +*/ + +/** + Create a new model. + + @param world The world in which to make this model. + + @param indices SerdIndexOption flags (e.g. SERD_SPO|SERD_OPS). Be sure to + enable an index where the most significant node(s) are not variables in your + queries (e.g. to make (? P O) queries, enable either SERD_OPS or SERD_POS). + + @param flags Model options. +*/ +SERD_API +SerdModel* +serd_model_new(SerdWorld* world, unsigned indices, SerdModelFlags flags); + +/** + Close and free `model`. +*/ +SERD_API +void +serd_model_free(SerdModel* model); + +/** + Get the world associated with `model`. +*/ +SERD_API +SerdWorld* +serd_model_get_world(SerdModel* model); + +/** + Return the number of quads stored in `model`. +*/ +SERD_API +size_t +serd_model_num_quads(const SerdModel* model); + +/** + Return an iterator to the start of `model`. +*/ +SERD_API +SerdIter* +serd_model_begin(const SerdModel* model); + +/** + Search for statements by a quad pattern. + @return an iterator to the first match, or NULL if no matches found. +*/ +SERD_API +SerdIter* +serd_model_find(SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +/** + Search for a single node that matches a pattern. + Exactly one of `s`, `p`, `o` must be NULL. + This function is mainly useful for predicates that only have one value. + The returned node must be freed using serd_node_free(). + @return the first matching node, or NULL if no matches are found. +*/ +SERD_API +const SerdNode* +serd_model_get(SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +/** + Return true iff a statement exists. +*/ +SERD_API +bool +serd_model_ask(SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +/** + Return the number of matching statements. +*/ +SERD_API +size_t +serd_model_count(SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +/** + Add a statement to a model from nodes. + + This function fails if there are any active iterators on `model`. +*/ +SERD_API +SerdStatus +serd_model_add(SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +/** + Add a statement to a model. + + This function fails if there are any active iterators on `model`. +*/ +SERD_API +SerdStatus +serd_model_add_statement(SerdModel* model, const SerdStatement* statement); + +/** + Remove a quad from a model via an iterator. + + Calling this function invalidates all iterators on `model` except `iter`. + + @param model The model which `iter` points to. + @param iter Iterator to the element to erase, which is incremented to the + next value on return. +*/ +SERD_API +SerdStatus +serd_model_erase(SerdModel* model, SerdIter* iter); + +/** + @} + @name Inserter + @{ +*/ + +/** + Create an inserter for writing statements to a model. +*/ +SERD_API +SerdInserter* +serd_inserter_new(SerdModel* model, + SerdEnv* env, + const SerdNode* default_graph); + +/** + Free an inserter. +*/ +SERD_API +void +serd_inserter_free(SerdInserter* inserter); + +/** + Return a sink interface that adds statements via `inserter`. +*/ +SERD_API +const SerdSinkInterface* +serd_inserter_get_sink_interface(SerdInserter* inserter); + +/** + @} + @name Statement + @{ +*/ + +/** + Return the given node in `statement`. +*/ +SERD_API +const SerdNode* +serd_statement_get_node(const SerdStatement* statement, SerdField field); + +/** + Return the subject in `statement`. +*/ +SERD_API +const SerdNode* +serd_statement_get_subject(const SerdStatement* statement); + +/** + Return the predicate in `statement`. +*/ +SERD_API +const SerdNode* +serd_statement_get_predicate(const SerdStatement* statement); + +/** + Return the object in `statement`. +*/ +SERD_API +const SerdNode* +serd_statement_get_object(const SerdStatement* statement); + +/** + Return the graph in `statement`. +*/ +SERD_API +const SerdNode* +serd_statement_get_graph(const SerdStatement* statement); + +/** + Return true iff `statement` matches the given pattern. + + The matching rules are the same used for querying: nodes match if they are + equivalent, and NULL acts as a wildcard that matches any node. +*/ +SERD_API +bool +serd_statement_matches(const SerdStatement* statement, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* graph); + +/** + @} + @name Iteration + @{ +*/ + +/** + Return the statement pointed to by `iter`. +*/ +SERD_API +const SerdStatement* +serd_iter_get(const SerdIter* iter); + +/** + Return the store pointed to by `iter`. +*/ +SERD_API +const SerdModel* +serd_iter_get_model(const SerdIter* iter); + +/** + Increment `iter` to point to the next statement. +*/ +SERD_API +bool +serd_iter_next(SerdIter* iter); + +/** + Return true iff `iter` is at the end of its range. +*/ +SERD_API +bool +serd_iter_end(const SerdIter* iter); + +/** + Free `iter`. +*/ +SERD_API +void +serd_iter_free(SerdIter* iter); + /** @} @} diff --git a/src/inserter.c b/src/inserter.c new file mode 100644 index 00000000..d7059c57 --- /dev/null +++ b/src/inserter.c @@ -0,0 +1,93 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "inserter.h" + +#include + +static SerdStatus +serd_inserter_on_base(SerdInserter* inserter, const SerdNode* uri) +{ + return serd_env_set_base_uri(inserter->env, uri); +} + +static SerdStatus +serd_inserter_on_prefix(SerdInserter* inserter, + const SerdNode* name, + const SerdNode* uri) +{ + return serd_env_set_prefix(inserter->env, name, uri); +} + +static SerdStatus +serd_inserter_on_statement(SerdInserter* inserter, + SerdStatementFlags flags, + const SerdNode* graph, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object) +{ + if (!inserter || !subject || !predicate || !object) { + return SERD_ERR_BAD_ARG; + } + + SerdNode* g = graph ? serd_env_expand_node(inserter->env, graph) : NULL; + SerdNode* s = serd_env_expand_node(inserter->env, subject); + SerdNode* p = serd_env_expand_node(inserter->env, predicate); + SerdNode* o = serd_env_expand_node(inserter->env, object); + + const SerdStatus st = serd_model_add(inserter->model, + s ? s : subject, + p ? p : predicate, + o ? o : object, + g ? g : inserter->default_graph); + + serd_node_free(g); + serd_node_free(s); + serd_node_free(p); + serd_node_free(o); + + return st > SERD_FAILURE ? st : SERD_SUCCESS; +} + +SerdInserter* +serd_inserter_new(SerdModel* model, SerdEnv* env, const SerdNode* default_graph) +{ + SerdInserter* inserter = (SerdInserter*)calloc(1, sizeof(SerdInserter)); + inserter->model = model; + inserter->env = env; + inserter->default_graph = serd_node_copy(default_graph); + inserter->iface.handle = inserter; + inserter->iface.base = (SerdBaseSink)serd_inserter_on_base; + inserter->iface.prefix = (SerdPrefixSink)serd_inserter_on_prefix; + inserter->iface.statement = (SerdStatementSink)serd_inserter_on_statement; + return inserter; +} + +void +serd_inserter_free(SerdInserter* inserter) +{ + if (inserter) { + serd_node_free(inserter->default_graph); + free(inserter); + } +} + +const SerdSinkInterface* +serd_inserter_get_sink_interface(SerdInserter* inserter) +{ + return &inserter->iface; +} diff --git a/src/inserter.h b/src/inserter.h new file mode 100644 index 00000000..0553f5a4 --- /dev/null +++ b/src/inserter.h @@ -0,0 +1,29 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_INSERTER_H +#define SERD_INSERTER_H + +#include "serd/serd.h" + +struct SerdInserterImpl { + SerdModel* model; + SerdEnv* env; + SerdNode* default_graph; + SerdSinkInterface iface; +}; + +#endif // SERD_INSERTER_H diff --git a/src/iter.c b/src/iter.c new file mode 100644 index 00000000..904f2562 --- /dev/null +++ b/src/iter.c @@ -0,0 +1,234 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "iter.h" + +#include "log.h" +#include "model.h" +#include "statement.h" +#include "world.h" + +#include "serd/serd.h" +#include "zix/btree.h" + +#include +#include + +static inline bool +serd_iter_forward(SerdIter* iter) +{ + zix_btree_iter_increment(iter->cur); + return zix_btree_iter_is_end(iter->cur); +} + +/** + Seek forward as necessary until `iter` points at a match. + @return true iff iterator reached end of valid range. +*/ +static inline bool +serd_iter_seek_match(SerdIter* iter) +{ + for (iter->end = true; !zix_btree_iter_is_end(iter->cur); + serd_iter_forward(iter)) { + const SerdStatement* s = (const SerdStatement*)zix_btree_get(iter->cur); + if (serd_statement_matches_quad(s, iter->pat)) { + return (iter->end = false); + } + } + return true; +} + +/** + Seek forward as necessary until `iter` points at a match, or the prefix + no longer matches iter->pat. + @return true iff iterator reached end of valid range. +*/ +static inline bool +serd_iter_seek_match_range(SerdIter* iter) +{ + assert(!iter->end); + + do { + const SerdStatement* key = (const SerdStatement*)zix_btree_get(iter->cur); + + if (serd_statement_matches_quad(key, iter->pat)) { + return false; // Found match + } + + for (int i = 0; i < iter->n_prefix; ++i) { + const int idx = orderings[iter->order][i]; + if (!serd_node_pattern_match(key->nodes[idx], iter->pat[idx])) { + iter->end = true; // Reached end of valid range + return true; + } + } + } while (!serd_iter_forward(iter)); + + return (iter->end = true); // Reached end +} + +static bool +check_version(const SerdIter* const iter) +{ + if (iter->version != iter->model->version) { + serd_world_errorf(iter->model->world, + SERD_ERR_BAD_ITER, + "attempt to use invalidated iterator\n"); + return false; + } + return true; +} + +SerdIter* +serd_iter_new(const SerdModel* model, + ZixBTreeIter* cur, + const SerdQuad pat, + SerdOrder order, + SearchMode mode, + int n_prefix) +{ + SerdIter* iter = (SerdIter*)malloc(sizeof(SerdIter)); + iter->model = model; + iter->cur = cur; + iter->order = order; + iter->mode = mode; + iter->n_prefix = n_prefix; + iter->version = model->version; + iter->end = false; + for (int i = 0; i < TUP_LEN; ++i) { + iter->pat[i] = pat[i]; + } + + switch (iter->mode) { + case ALL: + case SINGLE: + case RANGE: + assert(serd_statement_matches_quad( + ((const SerdStatement*)zix_btree_get(iter->cur)), + iter->pat)); + break; + case FILTER_RANGE: serd_iter_seek_match_range(iter); break; + case FILTER_ALL: serd_iter_seek_match(iter); break; + } + +#ifdef SERD_DEBUG_ITER + const SerdStatement* statement = serd_iter_get(iter); + SERD_ITER_LOG("New %p pat=" TUP_FMT " cur=" TUP_FMT " end=%d\n", + (void*)iter, + TUP_FMT_ARGS(pat), + TUP_FMT_ARGS(statement->nodes), + iter->end); +#endif + + return iter; +} + +const SerdModel* +serd_iter_get_model(const SerdIter* iter) +{ + return iter->model; +} + +const SerdStatement* +serd_iter_get(const SerdIter* iter) +{ + return check_version(iter) ? (const SerdStatement*)zix_btree_get(iter->cur) + : NULL; +} + +bool +serd_iter_scan_next(SerdIter* iter) +{ + if (iter->end) { + return true; + } + + const SerdStatement* key = NULL; + if (!iter->end) { + switch (iter->mode) { + case ALL: + // At the end if the cursor is (assigned above) + break; + case SINGLE: + iter->end = true; + SERD_ITER_LOG("%p reached single end\n", (void*)iter); + break; + case RANGE: + SERD_ITER_LOG("%p range next\n", (void*)iter); + // At the end if the MSNs no longer match + key = (const SerdStatement*)zix_btree_get(iter->cur); + assert(key); + for (int i = 0; i < iter->n_prefix; ++i) { + const int idx = orderings[iter->order][i]; + if (!serd_node_pattern_match(key->nodes[idx], iter->pat[idx])) { + iter->end = true; + SERD_ITER_LOG("%p reached non-match end\n", (void*)iter); + break; + } + } + break; + case FILTER_RANGE: + // Seek forward to next match, stopping if prefix changes + serd_iter_seek_match_range(iter); + break; + case FILTER_ALL: + // Seek forward to next match + serd_iter_seek_match(iter); + break; + } + } else { + SERD_ITER_LOG("%p reached index end\n", (void*)iter); + } + + if (iter->end) { + SERD_ITER_LOG("%p Reached end\n", (void*)iter); + return true; + } else { +#ifdef SERD_DEBUG_ITER + SERD_ITER_LOG("%p Increment to " TUP_FMT "\n", + (void*)iter, + TUP_FMT_ARGS(serd_iter_get(iter)->nodes)); +#endif + return false; + } +} + +bool +serd_iter_next(SerdIter* iter) +{ + if (iter->end || !check_version(iter)) { + return true; + } + + iter->end = serd_iter_forward(iter); + return serd_iter_scan_next(iter); +} + +bool +serd_iter_end(const SerdIter* iter) +{ + return !iter || iter->end; +} + +void +serd_iter_free(SerdIter* iter) +{ + SERD_ITER_LOG("%p Free\n", (void*)iter); + if (iter) { + zix_btree_iter_free(iter->cur); + free(iter); + } +} diff --git a/src/iter.h b/src/iter.h new file mode 100644 index 00000000..82ab2119 --- /dev/null +++ b/src/iter.h @@ -0,0 +1,90 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_ITER_H +#define SERD_ITER_H + +#include "statement.h" + +#include "serd/serd.h" +#include "zix/btree.h" + +#include + +/** Triple ordering */ +typedef enum { + SPO, ///< Subject, Predicate, Object + SOP, ///< Subject, Object, Predicate + OPS, ///< Object, Predicate, Subject + OSP, ///< Object, Subject, Predicate + PSO, ///< Predicate, Subject, Object + POS, ///< Predicate, Object, Subject + GSPO, ///< Graph, Subject, Predicate, Object + GSOP, ///< Graph, Subject, Object, Predicate + GOPS, ///< Graph, Object, Predicate, Subject + GOSP, ///< Graph, Object, Subject, Predicate + GPSO, ///< Graph, Predicate, Subject, Object + GPOS ///< Graph, Predicate, Object, Subject +} SerdOrder; + +/** Mode for searching or iteration */ +typedef enum { + ALL, ///< Iterate over entire store + SINGLE, ///< Iteration over a single element (exact search) + RANGE, ///< Iterate over range with equal prefix + FILTER_RANGE, ///< Iterate over range with equal prefix, filtering + FILTER_ALL ///< Iterate to end of store, filtering +} SearchMode; + +struct SerdIterImpl { + const SerdModel* model; ///< Model being iterated over + ZixBTreeIter* cur; ///< Current DB cursor + SerdQuad pat; ///< Pattern (in ordering order) + SerdOrder order; ///< Store order (which index) + SearchMode mode; ///< Iteration mode + int n_prefix; ///< Prefix for RANGE and FILTER_RANGE + uint64_t version; ///< Model version when iterator was created + bool end; ///< True iff reached end +}; + +#define NUM_ORDERS 12 +#define TUP_LEN 4 + +/** + Quads of indices for each order, from most to least significant + (array indexed by SordOrder) +*/ +static const int orderings[NUM_ORDERS][TUP_LEN] = { + { 0, 1, 2, 3 }, { 0, 2, 1, 3 }, // SPO, SOP + { 2, 1, 0, 3 }, { 2, 0, 1, 3 }, // OPS, OSP + { 1, 0, 2, 3 }, { 1, 2, 0, 3 }, // PSO, POS + { 3, 0, 1, 2 }, { 3, 0, 2, 1 }, // GSPO, GSOP + { 3, 2, 1, 0 }, { 3, 2, 0, 1 }, // GOPS, GOSP + { 3, 1, 0, 2 }, { 3, 1, 2, 0 } // GPSO, GPOS +}; + +SerdIter* serd_iter_new(const SerdModel* model, + ZixBTreeIter* cur, + const SerdQuad pat, + SerdOrder order, + SearchMode mode, + int n_prefix); + +bool serd_iter_scan_next(SerdIter* iter); + +bool serd_quad_match(const SerdQuad x, const SerdQuad y); + +#endif // SERD_ITER_H diff --git a/src/log.h b/src/log.h new file mode 100644 index 00000000..368f34b5 --- /dev/null +++ b/src/log.h @@ -0,0 +1,54 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_LOG_H +#define SERD_LOG_H + +#ifdef SERD_DEBUG_SEARCH +/** String name of each ordering (array indexed by SordOrder) */ +static const char* const order_names[] = { + "spo", "sop", "ops", "osp", "pso", "pos", + "gspo", "gsop", "gops", "gosp", "gpso", "gpos" +}; +#endif + +#define TUP_FMT "(%s %s %s %s)" +#define TUP_FMT_ELEM(e) ((e) ? serd_node_get_string(e) : "*") +#define TUP_FMT_ARGS(t) \ + TUP_FMT_ELEM((t)[0]), \ + TUP_FMT_ELEM((t)[1]), \ + TUP_FMT_ELEM((t)[2]), \ + TUP_FMT_ELEM((t)[3]) + +#define SERD_LOG(prefix, ...) fprintf(stderr, "serd." prefix ": " __VA_ARGS__) + +#ifdef SERD_DEBUG_ITER +# define SERD_ITER_LOG(...) SERD_LOG("iter", __VA_ARGS__) +#else +# define SERD_ITER_LOG(...) +#endif +#ifdef SERD_DEBUG_SEARCH +# define SERD_FIND_LOG(...) SERD_LOG("search", __VA_ARGS__) +#else +# define SERD_FIND_LOG(...) +#endif +#ifdef SERD_DEBUG_WRITE +# define SERD_WRITE_LOG(...) SERD_LOG("write", __VA_ARGS__) +#else +# define SERD_WRITE_LOG(...) +#endif + +#endif // SERD_LOG_H diff --git a/src/model.c b/src/model.c new file mode 100644 index 00000000..43b815dd --- /dev/null +++ b/src/model.c @@ -0,0 +1,474 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "model.h" + +#include "iter.h" +#include "log.h" +#include "node.h" +#include "nodes.h" +#include "statement.h" +#include "world.h" + +#include "zix/btree.h" + +#include +#include +#include +#include + +#define DEFAULT_ORDER SPO +#define DEFAULT_GRAPH_ORDER GSPO + +/** + Compare quads lexicographically, ignoring graph. + + NULL IDs (equal to 0) are treated as wildcards, always less than every + other possible ID, except itself. +*/ +static int +serd_triple_compare(const void* x_ptr, const void* y_ptr, void* user_data) +{ + const int* const ordering = (const int*)user_data; + const SerdStatement* const x = (const SerdStatement*)x_ptr; + const SerdStatement* const y = (const SerdStatement*)y_ptr; + + for (int i = 0; i < TUP_LEN - 1; ++i) { + const int idx = ordering[i]; + assert(idx != SERD_GRAPH); + + const int cmp = serd_node_compare(x->nodes[idx], y->nodes[idx], true); + if (cmp) { + return cmp; + } + } + + return 0; +} + +/** + Compare quads lexicographically, with exact (non-wildcard) graph matching. +*/ +static int +serd_quad_compare(const void* x_ptr, const void* y_ptr, void* user_data) +{ + const int* const ordering = (const int*)user_data; + const SerdStatement* const x = (const SerdStatement*)x_ptr; + const SerdStatement* const y = (const SerdStatement*)y_ptr; + + // Compare graph without wildcard matching + const int gcmp = serd_node_compare( + x->nodes[SERD_GRAPH], y->nodes[SERD_GRAPH], false); + if (gcmp) { + return gcmp; + } + + // Compare triple fields in appropriate order with wildcard matching + for (int i = 0; i < TUP_LEN; ++i) { + const int idx = ordering[i]; + if (idx != SERD_GRAPH) { + const int cmp = + serd_node_compare(x->nodes[idx], y->nodes[idx], true); + if (cmp) { + return cmp; + } + } + } + + return 0; +} + +/** + Return true iff `serd` has an index for `order`. + If `graphs` is true, `order` will be modified to be the + corresponding order with a G prepended (so G will be the MSN). +*/ +static inline bool +serd_model_has_index(SerdModel* model, + SerdOrder* order, + int* n_prefix, + bool graphs) +{ + if (graphs) { + *order = (SerdOrder)(*order + GSPO); + *n_prefix += 1; + } + + return model->indices[*order]; +} + +/** + Return the best available index for a pattern. + @param pat Pattern in standard (S P O G) order + @param mode Set to the (best) iteration mode for iterating over results + @param n_prefix Set to the length of the range prefix + (for `mode` == RANGE and `mode` == FILTER_RANGE) +*/ +static inline SerdOrder +serd_model_best_index(SerdModel* model, + const SerdQuad pat, + SearchMode* mode, + int* n_prefix) +{ + const bool graph_search = (pat[SERD_GRAPH] != 0); + + const unsigned sig = ((pat[0] ? 1 : 0) * 0x100 + + (pat[1] ? 1 : 0) * 0x010 + + (pat[2] ? 1 : 0) * 0x001); + + SerdOrder good[2] = { (SerdOrder)-1, (SerdOrder)-1 }; + +#define PAT_CASE(sig, m, g0, g1, np) \ + case sig: \ + *mode = m; \ + good[0] = g0; \ + good[1] = g1; \ + *n_prefix = np; \ + break + + // Good orderings that don't require filtering + *mode = RANGE; + *n_prefix = 0; + switch (sig) { + case 0x000: + assert(graph_search); + *mode = RANGE; + *n_prefix = 1; + return DEFAULT_GRAPH_ORDER; + case 0x111: + *mode = SINGLE; + return graph_search ? DEFAULT_GRAPH_ORDER : DEFAULT_ORDER; + + PAT_CASE(0x001, RANGE, OPS, OSP, 1); + PAT_CASE(0x010, RANGE, POS, PSO, 1); + PAT_CASE(0x011, RANGE, OPS, POS, 2); + PAT_CASE(0x100, RANGE, SPO, SOP, 1); + PAT_CASE(0x101, RANGE, SOP, OSP, 2); + PAT_CASE(0x110, RANGE, SPO, PSO, 2); + } + + if (*mode == RANGE) { + if (serd_model_has_index(model, &good[0], n_prefix, graph_search)) { + return good[0]; + } else if (serd_model_has_index( + model, &good[1], n_prefix, graph_search)) { + return good[1]; + } + } + + // Not so good orderings that require filtering, but can + // still be constrained to a range + switch (sig) { + PAT_CASE(0x011, FILTER_RANGE, OSP, PSO, 1); + PAT_CASE(0x101, FILTER_RANGE, SPO, OPS, 1); + // SPO is always present, so 0x110 is never reached here + default: break; + } + + if (*mode == FILTER_RANGE) { + if (serd_model_has_index(model, &good[0], n_prefix, graph_search)) { + return good[0]; + } else if (serd_model_has_index( + model, &good[1], n_prefix, graph_search)) { + return good[1]; + } + } + + if (graph_search) { + *mode = FILTER_RANGE; + *n_prefix = 1; + return DEFAULT_GRAPH_ORDER; + } else { + *mode = FILTER_ALL; + return DEFAULT_ORDER; + } +} + +SerdModel* +serd_model_new(SerdWorld* world, unsigned indices, SerdModelFlags flags) +{ + SerdModel* model = (SerdModel*)calloc(1, sizeof(struct SerdModelImpl)); + model->world = world; + + indices |= SERD_SPO; + + for (unsigned i = 0; i < (NUM_ORDERS / 2); ++i) { + const int* const ordering = orderings[i]; + const int* const g_ordering = orderings[i + (NUM_ORDERS / 2)]; + + if (indices & (1 << i)) { + model->indices[i] = + zix_btree_new((ZixComparator)serd_triple_compare, + (const void*)ordering, + NULL); + if (flags & SERD_MODEL_GRAPHS) { + model->indices[i + (NUM_ORDERS / 2)] = + zix_btree_new((ZixComparator)serd_quad_compare, + (const void*)g_ordering, + NULL); + } + } + } + + return model; +} + +static void +serd_model_drop_statement(SerdModel* model, SerdStatement* statement) + +{ + for (int i = 0; i < TUP_LEN; ++i) { + if (statement->nodes[i]) { + serd_nodes_deref(model->world->nodes, statement->nodes[i]); + } + } + + free(statement); +} + +void +serd_model_free(SerdModel* model) +{ + if (!model) { + return; + } + + // Free quads + ZixBTree* index = model->indices[model->indices[DEFAULT_GRAPH_ORDER] + ? DEFAULT_GRAPH_ORDER + : DEFAULT_ORDER]; + ZixBTreeIter* t = zix_btree_begin(index); + for (; !zix_btree_iter_is_end(t); zix_btree_iter_increment(t)) { + free(zix_btree_get(t)); + } + zix_btree_iter_free(t); + + // Free indices + for (unsigned o = 0; o < NUM_ORDERS; ++o) { + if (model->indices[o]) { + zix_btree_free(model->indices[o]); + } + } + + free(model); +} + +SerdWorld* +serd_model_get_world(SerdModel* model) +{ + return model->world; +} + +size_t +serd_model_num_quads(const SerdModel* model) +{ + const SerdOrder order = model->indices[GSPO] ? GSPO : SPO; + return zix_btree_size(model->indices[order]); +} + +SerdIter* +serd_model_begin(const SerdModel* model) +{ + if (serd_model_num_quads(model) == 0) { + return NULL; + } else { + const SerdOrder order = model->indices[GSPO] ? GSPO : SPO; + ZixBTreeIter* cur = zix_btree_begin(model->indices[order]); + SerdQuad pat = { 0, 0, 0, 0 }; + return serd_iter_new(model, cur, pat, order, ALL, 0); + } +} + +SerdIter* +serd_model_find(SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + const SerdQuad pat = { s, p, o, g }; + if (!pat[0] && !pat[1] && !pat[2] && !pat[3]) { + return serd_model_begin(model); + } + + SearchMode mode; + int n_prefix; + const SerdOrder index_order = + serd_model_best_index(model, pat, &mode, &n_prefix); + + SERD_FIND_LOG("Find " TUP_FMT " index=%s mode=%d n_prefix=%d\n", + TUP_FMT_ARGS(pat), + order_names[index_order], + mode, + n_prefix); + + if (pat[0] && pat[1] && pat[2] && pat[3]) { + mode = SINGLE; // No duplicate quads (Serd is a set) + } + + ZixBTree* const db = model->indices[index_order]; + ZixBTreeIter* cur = NULL; + zix_btree_lower_bound(db, pat, &cur); + if (zix_btree_iter_is_end(cur)) { + SERD_FIND_LOG("No match found, iterator at end\n"); + zix_btree_iter_free(cur); + return NULL; + } + const SerdStatement* const key = (const SerdStatement*)zix_btree_get(cur); + if (!key || ((mode == RANGE || mode == SINGLE) && + !serd_statement_matches_quad(key, pat))) { + SERD_FIND_LOG("No match found, cursor at " TUP_FMT "\n", + TUP_FMT_ARGS(key->nodes)); + + zix_btree_iter_free(cur); + return NULL; + } + + return serd_iter_new(model, cur, pat, index_order, mode, n_prefix); +} + +const SerdNode* +serd_model_get(SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + if ((bool)s + (bool)p + (bool)o != 2) { + return NULL; + } + + SerdIter* const i = serd_model_find(model, s, p, o, g); + if (i) { + const SerdStatement* statement = serd_iter_get(i); + serd_iter_free(i); + + if (!s) { + return serd_statement_get_subject(statement); + } else if (!p) { + return serd_statement_get_predicate(statement); + } else if (!o) { + return serd_statement_get_object(statement); + } + } + + return NULL; +} + +uint64_t +serd_model_count(SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + SerdIter* i = serd_model_find(model, s, p, o, g); + uint64_t n = 0; + for (; !serd_iter_end(i); serd_iter_next(i)) { + ++n; + } + serd_iter_free(i); + return n; +} + +bool +serd_model_ask(SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + SerdIter* iter = serd_model_find(model, s, p, o, g); + bool ret = (iter != NULL); + serd_iter_free(iter); + return ret; +} + +SerdStatus +serd_model_add(SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + if (!s || !p || !o) { + return serd_world_errorf(model->world, + SERD_ERR_BAD_ARG, + "attempt to add statement with NULL field\n"); + } + + SerdStatement* statement = (SerdStatement*)malloc(sizeof(SerdStatement)); + statement->nodes[0] = serd_nodes_intern(model->world->nodes, s); + statement->nodes[1] = serd_nodes_intern(model->world->nodes, p); + statement->nodes[2] = serd_nodes_intern(model->world->nodes, o); + statement->nodes[3] = serd_nodes_intern(model->world->nodes, g); + SERD_WRITE_LOG("Add " TUP_FMT "\n", TUP_FMT_ARGS(statement->nodes)); + + bool added = false; + for (unsigned i = 0; i < NUM_ORDERS; ++i) { + if (model->indices[i]) { + if (!zix_btree_insert(model->indices[i], statement)) { + added = true; + } else if (i == GSPO) { + break; // Tuple already indexed + } + } + } + + ++model->version; + if (added) { + return SERD_SUCCESS; + } + + serd_model_drop_statement(model, statement); + return SERD_FAILURE; +} + +SerdStatus +serd_model_add_statement(SerdModel* model, const SerdStatement* statement) +{ + return serd_model_add(model, + serd_statement_get_subject(statement), + serd_statement_get_predicate(statement), + serd_statement_get_object(statement), + serd_statement_get_graph(statement)); +} + +SerdStatus +serd_model_erase(SerdModel* model, SerdIter* iter) +{ + const SerdStatement* statement = serd_iter_get(iter); + + SERD_WRITE_LOG("Remove " TUP_FMT "\n", TUP_FMT_ARGS(statement->nodes)); + + SerdStatement* removed = NULL; + for (unsigned i = 0; i < NUM_ORDERS; ++i) { + if (model->indices[i]) { + zix_btree_remove(model->indices[i], + statement, + (void**)&removed, + i == iter->order ? &iter->cur : NULL); + } + } + } + iter->end = zix_btree_iter_is_end(iter->cur); + serd_iter_scan_next(iter); + + serd_model_drop_statement(model, removed); + iter->version = ++model->version; + + return SERD_SUCCESS; +} diff --git a/src/model.h b/src/model.h new file mode 100644 index 00000000..28cac0cf --- /dev/null +++ b/src/model.h @@ -0,0 +1,39 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_MODEL_H +#define SERD_MODEL_H + +#include "serd/serd.h" +#include "zix/btree.h" + +#include +#include + +#define SERD_NUM_ORDERS 12 + +struct SerdModelImpl { + SerdWorld* world; + + /** Index for each possible triple ordering (may or may not exist). + * Each index is a tree of SordQuad with the appropriate ordering. + */ + ZixBTree* indices[SERD_NUM_ORDERS]; + + uint64_t version; +}; + +#endif // SERD_MODEL_H diff --git a/src/node.c b/src/node.c index 523893df..32852dc1 100644 --- a/src/node.c +++ b/src/node.c @@ -247,6 +247,30 @@ serd_node_equals(const SerdNode* a, const SerdNode* b) return false; } +/** Compare nodes, considering NULL a match iff `wildcards` is true. */ +int +serd_node_compare(const SerdNode* a, const SerdNode* b, bool wildcards) +{ + if (a == b) { + return 0; + } else if (!a || !b) { + return wildcards ? 0 : (a < b) ? -1 : (a > b) ? 1 : 0; + } else if (a->type != b->type) { + return a->type - b->type; + } + + int cmp = strcmp(serd_node_get_string(a), serd_node_get_string(b)); + if (cmp) { + return cmp; + } + + cmp = serd_node_compare(serd_node_maybe_get_meta_c(a), + serd_node_maybe_get_meta_c(b), + false); + + return cmp; +} + static size_t serd_uri_string_length(const SerdURI* uri) { diff --git a/src/node.h b/src/node.h index 91202db1..004d617b 100644 --- a/src/node.h +++ b/src/node.h @@ -44,5 +44,6 @@ SerdNode* serd_node_malloc(size_t n_bytes, SerdNodeFlags flags, SerdType type); void serd_node_set(SerdNode** dst, const SerdNode* src); size_t serd_node_total_size(const SerdNode* node); SerdNode* serd_node_new_resolved_uri_i(const char* str, const SerdURI* base); +int serd_node_compare(const SerdNode* a, const SerdNode* b, bool wildcards); #endif // SERD_NODE_H diff --git a/src/nodes.c b/src/nodes.c new file mode 100644 index 00000000..169ca4ca --- /dev/null +++ b/src/nodes.c @@ -0,0 +1,109 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "nodes.h" + +#include "node.h" + +#include "zix/common.h" +#include "zix/digest.h" + +#include +#include + +typedef struct { + size_t refs; + SerdNode* node; +} NodesEntry; + +typedef struct { + size_t refs; + const SerdNode* node; +} NodesSearchKey; + +static uint32_t +nodes_hash(const void* n) +{ + const SerdNode* node = ((const NodesEntry*)n)->node; + uint32_t hash = zix_digest_start(); + hash = zix_digest_add(hash, node, serd_node_total_size(node)); + return hash; +} + +static bool +nodes_equal(const void* a, const void* b) +{ + const SerdNode* a_node = ((const NodesEntry*)a)->node; + const SerdNode* b_node = ((const NodesEntry*)b)->node; + const size_t a_size = serd_node_total_size(a_node); + const size_t b_size = serd_node_total_size(b_node); + return ((a_node == b_node) || + (a_size == b_size && !memcmp(a_node, b_node, a_size))); +} + +static void +free_entry(void* value, void* user_data) +{ + NodesEntry* entry = (NodesEntry*)value; + serd_node_free(entry->node); +} + +SerdNodes* +serd_nodes_new(void) +{ + return zix_hash_new(nodes_hash, nodes_equal, sizeof(NodesEntry)); +} + +void +serd_nodes_free(SerdNodes* nodes) +{ + zix_hash_foreach(nodes, free_entry, nodes); + zix_hash_free(nodes); +} + +SerdNode* +serd_nodes_intern(SerdNodes* nodes, const SerdNode* node) +{ + if (!node) { + return NULL; + } + + NodesSearchKey key = { 1, node }; + NodesEntry* inserted = NULL; + switch (zix_hash_insert(nodes, &key, (const void**)&inserted)) { + case ZIX_STATUS_EXISTS: + assert(serd_node_equals(inserted->node, node)); + ++inserted->refs; + break; + case ZIX_STATUS_SUCCESS: + inserted->node = serd_node_copy(node); + default: break; + } + + return inserted ? inserted->node : NULL; +} + +void +serd_nodes_deref(SerdNodes* nodes, const SerdNode* node) +{ + NodesSearchKey key = { 1, node }; + NodesEntry* entry = (NodesEntry*)zix_hash_find(nodes, &key); + if (entry && --entry->refs == 0) { + SerdNode* const intern_node = entry->node; + zix_hash_remove(nodes, entry); + serd_node_free(intern_node); + } +} diff --git a/src/nodes.h b/src/nodes.h new file mode 100644 index 00000000..43e6df28 --- /dev/null +++ b/src/nodes.h @@ -0,0 +1,33 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_NODES_H +#define SERD_NODES_H + +#include "serd/serd.h" +#include "zix/hash.h" + +typedef ZixHash SerdNodes; + +SerdNodes* serd_nodes_new(void); + +void serd_nodes_free(SerdNodes* nodes); + +SerdNode* serd_nodes_intern(SerdNodes* nodes, const SerdNode* node); + +void serd_nodes_deref(SerdNodes* nodes, const SerdNode* node); + +#endif // SERD_NODES_H diff --git a/src/serdi.c b/src/serdi.c index 6efce26e..1f12812b 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -94,6 +94,7 @@ print_usage(const char* name, bool error) fprintf(os, " -h Display this help and exit.\n"); fprintf(os, " -i SYNTAX Input syntax: turtle/ntriples/trig/nquads.\n"); fprintf(os, " -l Lax (non-strict) parsing.\n"); + fprintf(os, " -m Build and serialise a model (no streaming).\n"); fprintf(os, " -o SYNTAX Output syntax: turtle/ntriples/nquads.\n"); fprintf(os, " -p PREFIX Add PREFIX to blank node IDs.\n"); fprintf(os, " -q Suppress all output except data.\n"); @@ -132,6 +133,7 @@ main(int argc, char** argv) bool bulk_write = false; bool full_uris = false; bool lax = false; + bool use_model = false; bool quiet = false; const char* add_prefix = NULL; const char* chop_prefix = NULL; @@ -153,6 +155,8 @@ main(int argc, char** argv) return print_usage(argv[0], false); } else if (argv[a][1] == 'l') { lax = true; + } else if (argv[a][1] == 'm') { + use_model = true; } else if (argv[a][1] == 'q') { quiet = true; } else if (argv[a][1] == 'v') { @@ -205,11 +209,11 @@ main(int argc, char** argv) input_syntax = SERD_TRIG; } + const bool input_has_graphs = + input_syntax == SERD_NQUADS || input_syntax == SERD_TRIG; + if (!output_syntax) { - output_syntax = ( - (input_syntax == SERD_TURTLE || input_syntax == SERD_NTRIPLES) - ? SERD_NTRIPLES - : SERD_NQUADS); + output_syntax = input_has_graphs ? SERD_NQUADS : SERD_NTRIPLES; } SerdNode* base = NULL; @@ -247,9 +251,19 @@ main(int argc, char** argv) world, output_syntax, (SerdStyle)output_style, env, serd_file_sink, out_fd); - SerdReader* reader = serd_reader_new( - world, input_syntax, serd_writer_get_sink_interface(writer)); + SerdReader* reader = NULL; + SerdModel* model = NULL; + SerdInserter* inserter = NULL; + const SerdSinkInterface* sink = NULL; + if (use_model) { + model = serd_model_new(world, SERD_SPO, input_has_graphs); + inserter = serd_inserter_new(model, env, NULL); + sink = serd_inserter_get_sink_interface(inserter); + } else { + sink = serd_writer_get_sink_interface(writer); + } + reader = serd_reader_new(world, input_syntax, sink); serd_reader_set_strict(reader, !lax); if (quiet) { serd_world_set_error_sink(world, quiet_error_sink, NULL); @@ -285,6 +299,27 @@ main(int argc, char** argv) serd_reader_end_stream(reader); + if (status <= SERD_FAILURE && use_model) { + const SerdSinkInterface* wsink = serd_writer_get_sink_interface(writer); + serd_env_foreach(env, wsink->prefix, writer); + + SerdIter* iter = serd_model_begin(model); + for (; !serd_iter_end(iter); serd_iter_next(iter)) { + const SerdStatement* s = serd_iter_get(iter); + if ((status = wsink->statement(wsink->handle, + 0, + serd_statement_get_graph(s), + serd_statement_get_subject(s), + serd_statement_get_predicate(s), + serd_statement_get_object(s)))) { + break; + } + } + serd_iter_free(iter); + } + + serd_inserter_free(inserter); + serd_model_free(model); serd_reader_free(reader); serd_writer_finish(writer); serd_writer_free(writer); diff --git a/src/statement.c b/src/statement.c new file mode 100644 index 00000000..d6d1eef8 --- /dev/null +++ b/src/statement.c @@ -0,0 +1,70 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "statement.h" + +const SerdNode* +serd_statement_get_node(const SerdStatement* statement, SerdField field) +{ + return statement->nodes[field]; +} + +const SerdNode* +serd_statement_get_subject(const SerdStatement* statement) +{ + return statement->nodes[SERD_SUBJECT]; +} + +const SerdNode* +serd_statement_get_predicate(const SerdStatement* statement) +{ + return statement->nodes[SERD_PREDICATE]; +} + +const SerdNode* +serd_statement_get_object(const SerdStatement* statement) +{ + return statement->nodes[SERD_OBJECT]; +} + +const SerdNode* +serd_statement_get_graph(const SerdStatement* statement) +{ + return statement->nodes[SERD_GRAPH]; +} + +bool +serd_statement_matches(const SerdStatement* statement, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* graph) + +{ + return (serd_node_pattern_match(statement->nodes[0], subject) && + serd_node_pattern_match(statement->nodes[1], predicate) && + serd_node_pattern_match(statement->nodes[2], object) && + serd_node_pattern_match(statement->nodes[3], graph)); +} + +bool +serd_statement_matches_quad(const SerdStatement* statement, const SerdQuad quad) +{ + return serd_node_pattern_match(statement->nodes[0], quad[0]) && + serd_node_pattern_match(statement->nodes[1], quad[1]) && + serd_node_pattern_match(statement->nodes[2], quad[2]) && + serd_node_pattern_match(statement->nodes[3], quad[3]); +} diff --git a/src/statement.h b/src/statement.h new file mode 100644 index 00000000..20b0ad0c --- /dev/null +++ b/src/statement.h @@ -0,0 +1,47 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_STATEMENT_H +#define SERD_STATEMENT_H + +#include "serd/serd.h" + +#include + +#include "serd/serd.h" + +/** + Quad of nodes (a statement), or a quad pattern. + + Nodes are ordered (S P O G). The ID of the default graph is 0. +*/ +typedef const SerdNode* SerdQuad[4]; + +struct SerdStatementImpl { + SerdQuad nodes; +}; + +static inline bool +serd_node_pattern_match(const SerdNode* a, const SerdNode* b) +{ + return !a || !b || (a == b) || serd_node_equals(a, b); +} + +bool +serd_statement_matches_quad(const SerdStatement* statement, + const SerdQuad quad); + +#endif // SERD_STATEMENT_H diff --git a/src/string.c b/src/string.c index 1755d48a..3ca5bd98 100644 --- a/src/string.c +++ b/src/string.c @@ -31,6 +31,7 @@ serd_strerror(SerdStatus status) case SERD_ERR_UNKNOWN: return "Unknown error"; case SERD_ERR_BAD_SYNTAX: return "Invalid syntax"; case SERD_ERR_BAD_ARG: return "Invalid argument"; + case SERD_ERR_BAD_ITER: return "Invalid iterator"; case SERD_ERR_NOT_FOUND: return "Not found"; case SERD_ERR_ID_CLASH: return "Blank node ID clash"; case SERD_ERR_BAD_CURIE: return "Invalid CURIE"; diff --git a/src/world.c b/src/world.c index 9d364e04..fbe9da3f 100644 --- a/src/world.c +++ b/src/world.c @@ -77,6 +77,7 @@ serd_world_new(void) SerdWorld* world = (SerdWorld*)calloc(1, sizeof(SerdWorld)); world->blank_node = serd_node_new_blank("b0000000000"); + world->nodes = serd_nodes_new(); return world; } @@ -84,6 +85,7 @@ serd_world_new(void) void serd_world_free(SerdWorld* world) { + serd_nodes_free(world->nodes); free(world); } diff --git a/src/world.h b/src/world.h index e3bd3784..a6722a0c 100644 --- a/src/world.h +++ b/src/world.h @@ -19,9 +19,12 @@ #include "serd/serd.h" +#include "nodes.h" + #include struct SerdWorldImpl { + SerdNodes* nodes; SerdErrorSink error_sink; void* error_handle; uint32_t next_blank_id; diff --git a/tests/model_test.c b/tests/model_test.c new file mode 100644 index 00000000..7ecd4eb2 --- /dev/null +++ b/tests/model_test.c @@ -0,0 +1,673 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include +#include +#include +#include + +#include "serd/serd.h" + +#include "../src/log.h" +#include "test_utils.h" + +#define WILDCARD_NODE NULL + +static const unsigned n_objects_per = 2; + +typedef const SerdNode* Quad[4]; + +typedef struct +{ + Quad query; + int expected_num_results; +} QueryTest; + +static SerdNode* +uri(SerdWorld* world, int num) +{ + char str[] = "eg:000"; + snprintf(str + 3, 4, "%03d", num); + return serd_node_new_uri(str); +} + +static int +generate(SerdWorld* world, SerdModel* model, size_t n_quads, SerdNode* graph) +{ + for (size_t i = 0; i < n_quads; ++i) { + int num = (i * n_objects_per) + 1; + + SerdNode* ids[2 + n_objects_per]; + for (unsigned j = 0; j < 2 + n_objects_per; ++j) { + ids[j] = uri(world, num++); + } + + for (unsigned j = 0; j < n_objects_per; ++j) { + if (serd_model_add(model, ids[0], ids[1], ids[2 + j], graph)) { + FAIL("Failed to add quad\n"); + } + } + + for (unsigned j = 0; j < 2 + n_objects_per; ++j) { + serd_node_free(ids[j]); + } + } + + // Add some literals + + // (98 4 "hello") and (98 4 "hello"^^<5>) + SerdNode* hello = serd_node_new_literal("hello", NULL, NULL); + SerdNode* hello_gb = serd_node_new_literal("hello", NULL, "en-gb"); + SerdNode* hello_us = serd_node_new_literal("hello", NULL, "en-us"); + SerdNode* hello_t4 = serd_node_new_literal("hello", uri(world, 4), NULL); + SerdNode* hello_t5 = serd_node_new_literal("hello", uri(world, 5), NULL); + if (serd_model_add(model, uri(world, 98), uri(world, 4), hello, graph)) { + FAIL("Failed to add untyped literal\n"); + } + if (serd_model_add(model, uri(world, 98), uri(world, 4), hello_t5, graph)) { + FAIL("Failed to add typed literal\n"); + } + + // (96 4 "hello"^^<4>) and (96 4 "hello"^^<5>) + if (serd_model_add(model, uri(world, 96), uri(world, 4), hello_t4, graph)) { + FAIL("Failed to add typed literal\n"); + } + if (serd_model_add(model, uri(world, 96), uri(world, 4), hello_t5, graph)) { + FAIL("Failed to add typed literal\n"); + } + + // (94 5 "hello") and (94 5 "hello"@en-gb) + if (serd_model_add(model, uri(world, 94), uri(world, 5), hello, graph)) { + FAIL("Failed to add literal\n"); + } + if (serd_model_add(model, uri(world, 94), uri(world, 5), hello_gb, graph)) { + FAIL("Failed to add literal with language\n"); + } + + // (92 6 "hello"@en-us) and (92 6 "hello"@en-gb) + if (serd_model_add(model, uri(world, 92), uri(world, 6), hello_us, graph)) { + FAIL("Failed to add literal with language\n"); + } + if (serd_model_add(model, uri(world, 92), uri(world, 6), hello_gb, graph)) { + FAIL("Failed to add literal with language\n"); + } + + // (14 6 "bonjour"@fr) and (14 6 "salut"@fr) + SerdNode* bonjour = serd_node_new_literal("bonjour", NULL, "fr"); + SerdNode* salut = serd_node_new_literal("salut", NULL, "fr"); + if (serd_model_add(model, uri(world, 14), uri(world, 6), bonjour, graph)) { + FAIL("Failed to add literal with language\n"); + } + if (serd_model_add(model, uri(world, 14), uri(world, 6), salut, graph)) { + FAIL("Failed to add literal with language\n"); + } + + // Attempt to add duplicates + if (!serd_model_add(model, uri(world, 14), uri(world, 6), salut, graph)) { + FAIL("Successfully added duplicate statement\n"); + } + + // Add a blank node subject + SerdNode* ablank = serd_node_new_blank("ablank"); + if (serd_model_add(model, ablank, uri(world, 6), salut, graph)) { + FAIL("Failed to add blank subject statement\n"); + } + + // Add statement with URI object + if (serd_model_add(model, ablank, uri(world, 6), uri(world, 7), graph)) { + FAIL("Failed to add URI subject statement\n"); + } + + return EXIT_SUCCESS; +} + +static int +test_read(SerdWorld* world, SerdModel* model, SerdNode* g, const size_t n_quads) +{ + Quad id; + + SerdIter* iter = serd_model_begin(model); + if (serd_iter_get_model(iter) != model) { + FAIL("Iterator has incorrect serd pointer\n"); + } + + for (; !serd_iter_end(iter); serd_iter_next(iter)) { + const SerdStatement* statement = serd_iter_get(iter); + if (!serd_statement_get_subject(statement) || + !serd_statement_get_predicate(statement) || + !serd_statement_get_object(statement)) { + FAIL("Iterator points to null node\n"); + } + } + + // Attempt to increment past end + if (!serd_iter_next(iter)) { + FAIL("Successfully incremented past end\n"); + } + + serd_iter_free(iter); + + const char* s = "hello"; + SerdNode* plain_hello = serd_node_new_literal(s, NULL, NULL); + SerdNode* type4_hello = serd_node_new_literal(s, uri(world, 4), NULL); + SerdNode* type5_hello = serd_node_new_literal(s, uri(world, 5), NULL); + SerdNode* gb_hello = serd_node_new_literal(s, NULL, "en-gb"); + SerdNode* us_hello = serd_node_new_literal(s, NULL, "en-us"); + +#define NUM_PATTERNS 18 + + QueryTest patterns[NUM_PATTERNS] = { + { { NULL, NULL, NULL }, (int)(n_quads * n_objects_per) + 12 }, + { { uri(world, 1), WILDCARD_NODE, WILDCARD_NODE }, 2 }, + { { uri(world, 9), uri(world, 9), uri(world, 9) }, 0 }, + { { uri(world, 1), uri(world, 2), uri(world, 4) }, 1 }, + { { uri(world, 3), uri(world, 4), WILDCARD_NODE }, 2 }, + { { WILDCARD_NODE, uri(world, 2), uri(world, 4) }, 1 }, + { { WILDCARD_NODE, WILDCARD_NODE, uri(world, 4) }, 1 }, + { { uri(world, 1), WILDCARD_NODE, WILDCARD_NODE }, 2 }, + { { uri(world, 1), WILDCARD_NODE, uri(world, 4) }, 1 }, + { { WILDCARD_NODE, uri(world, 2), WILDCARD_NODE }, 2 }, + { { uri(world, 98), uri(world, 4), plain_hello }, 1 }, + { { uri(world, 98), uri(world, 4), type5_hello }, 1 }, + { { uri(world, 96), uri(world, 4), type4_hello }, 1 }, + { { uri(world, 96), uri(world, 4), type5_hello }, 1 }, + { { uri(world, 94), uri(world, 5), plain_hello }, 1 }, + { { uri(world, 94), uri(world, 5), gb_hello }, 1 }, + { { uri(world, 92), uri(world, 6), gb_hello }, 1 }, + { { uri(world, 92), uri(world, 6), us_hello }, 1 } + }; + + Quad match = { uri(world, 1), uri(world, 2), uri(world, 4), g }; + if (!serd_model_ask(model, match[0], match[1], match[2], match[3])) { + FAILF("No match for " TUP_FMT "\n", TUP_FMT_ARGS(match)); + } + + Quad nomatch = { uri(world, 1), uri(world, 2), uri(world, 9), g }; + if (serd_model_ask(model, nomatch[0], nomatch[1], nomatch[2], nomatch[3])) { + FAILF("False match for " TUP_FMT "\n", TUP_FMT_ARGS(nomatch)); + } + + if (serd_model_get(model, NULL, NULL, uri(world, 3), g)) { + FAIL("Get *,*,3 succeeded\n"); + } else if (serd_model_get(model, uri(world, 1), uri(world, 99), NULL, g)) { + FAIL("Get 1,2,9 succeeded\n"); + } else if (!serd_node_equals( + serd_model_get( + model, uri(world, 1), uri(world, 2), NULL, g), + uri(world, 3))) { + FAIL("Get 1,2,* != 3\n"); + } else if (!serd_node_equals( + serd_model_get( + model, uri(world, 1), NULL, uri(world, 3), g), + uri(world, 2))) { + FAIL("Get 1,*,3 != 2\n"); + } else if (!serd_node_equals( + serd_model_get( + model, NULL, uri(world, 2), uri(world, 3), g), + uri(world, 1))) { + FAIL("Get *,2,3 != 1\n"); + } + + for (unsigned i = 0; i < NUM_PATTERNS; ++i) { + QueryTest test = patterns[i]; + Quad pat = { test.query[0], test.query[1], test.query[2], g }; + + iter = serd_model_find(model, pat[0], pat[1], pat[2], pat[3]); + int num_results = 0; + for (; !serd_iter_end(iter); serd_iter_next(iter)) { + ++num_results; + if (!serd_statement_matches( + serd_iter_get(iter), pat[0], pat[1], pat[2], pat[3])) { + serd_iter_free(iter); + FAILF("Query result " TUP_FMT " does not match pattern " TUP_FMT + "\n", + TUP_FMT_ARGS(id), + TUP_FMT_ARGS(pat)); + } + } + serd_iter_free(iter); + if (num_results != test.expected_num_results) { + FAILF("Expected %d results for " TUP_FMT ", got %d\n", + test.expected_num_results, + TUP_FMT_ARGS(pat), + num_results); + } + } + + // Query blank node subject + Quad pat = { serd_node_new_blank("ablank"), 0, 0 }; + int num_results = 0; + iter = serd_model_find(model, pat[0], pat[1], pat[2], pat[3]); + for (; !serd_iter_end(iter); serd_iter_next(iter)) { + ++num_results; + const SerdStatement* statement = serd_iter_get(iter); + if (!serd_statement_matches( + statement, pat[0], pat[1], pat[2], pat[3])) { + serd_iter_free(iter); + FAILF("Result for " TUP_FMT " does not match pattern\n", + TUP_FMT_ARGS(pat)); + } + } + serd_iter_free(iter); + + if (num_results != 2) { + FAIL("Blank node subject query failed\n"); + } + + // Test nested queries + const SerdNode* last_subject = 0; + iter = serd_model_find(model, NULL, NULL, NULL, NULL); + for (; !serd_iter_end(iter); serd_iter_next(iter)) { + const SerdStatement* statement = serd_iter_get(iter); + const SerdNode* subject = serd_statement_get_subject(statement); + if (subject == last_subject) { + continue; + } + + Quad subpat = { subject, 0, 0 }; + SerdIter* subiter = serd_model_find( + model, subpat[0], subpat[1], subpat[2], subpat[3]); + const SerdStatement* substatement = serd_iter_get(subiter); + uint64_t num_sub_results = 0; + if (serd_statement_get_subject(substatement) != subject) { + FAIL("Incorrect initial submatch\n"); + } + for (; !serd_iter_end(subiter); serd_iter_next(subiter)) { + if (!serd_statement_matches(serd_iter_get(subiter), + subpat[0], + subpat[1], + subpat[2], + subpat[3])) { + serd_iter_free(iter); + serd_iter_free(subiter); + FAIL("Nested query result does not match pattern\n"); + } + ++num_sub_results; + } + serd_iter_free(subiter); + if (num_sub_results != n_objects_per) { + FAILF("Nested query " TUP_FMT " failed" + " (%d results, expected %d)\n", + TUP_FMT_ARGS(subpat), + num_sub_results, + n_objects_per); + } + + uint64_t count = serd_model_count(model, subject, 0, 0, 0); + if (count != num_sub_results) { + FAILF("Query " TUP_FMT " count %d does not match result count %d\n", + TUP_FMT_ARGS(subpat), + count, + num_sub_results); + } + + last_subject = subject; + } + serd_iter_free(iter); + + return 0; +} + +static SerdStatus +unexpected_error(void* handle, const SerdError* error) +{ + fprintf(stderr, "error: "); + vfprintf(stderr, error->fmt, *error->args); + return SERD_SUCCESS; +} + +static SerdStatus +expected_error(void* handle, const SerdError* error) +{ + fprintf(stderr, "expected: "); + vfprintf(stderr, error->fmt, *error->args); + return SERD_SUCCESS; +} + +static int +test_free_null(SerdWorld* world, const size_t n_quads) +{ + serd_model_free(NULL); // Shouldn't crash + return 0; +} + +static int +test_get_world(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, false); + if (serd_model_get_world(model) != world) { + FAIL("Model has incorrect world pointer\n"); + } + + serd_model_free(model); + return 0; +} + +static int +test_add_null(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, false); + + serd_world_set_error_sink(world, expected_error, NULL); + if (!serd_model_add(model, 0, 0, 0, 0)) { + FAIL("Added NULL tuple\n"); + } else if (!serd_model_add(model, uri(world, 1), 0, 0, 0)) { + FAIL("Added tuple with NULL P and O\n"); + } else if (!serd_model_add(model, uri(world, 1), uri(world, 2), 0, 0)) { + FAIL("Added tuple with NULL O\n"); + } else if (serd_model_num_quads(model) != 0) { + FAIL("Model contains invalid statements\n"); + } + + serd_model_free(model); + return 0; +} + +static int +test_add_with_iterator(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, false); + + serd_world_set_error_sink(world, expected_error, NULL); + if (serd_model_add(model, uri(world, 1), uri(world, 2), uri(world, 3), 0)) { + FAIL("Failed to add statement\n"); + } + + // Add a statement with an active iterator + SerdIter* iter = serd_model_begin(model); + if (serd_model_add(model, uri(world, 1), uri(world, 2), uri(world, 4), 0)) { + FAIL("Failed to add statement with active iterator\n"); + } + + // Check that iterator has been invalidated + if (serd_iter_get(iter)) { + FAIL("Successfully dereferenced invalidated iterator\n"); + } else if (!serd_iter_next(iter)) { + FAIL("Successfully incremented invalidated iterator\n"); + } + + serd_iter_free(iter); + serd_model_free(model); + return 0; +} + +static int +test_erase_with_iterator(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, false); + + serd_world_set_error_sink(world, expected_error, NULL); + if (serd_model_add(model, uri(world, 1), uri(world, 2), uri(world, 3), 0) || + serd_model_add(model, uri(world, 4), uri(world, 5), uri(world, 6), 0)) { + FAIL("Failed to add statements\n"); + } + + // Erase a statement with an active iterator + SerdIter* iter1 = serd_model_begin(model); + SerdIter* iter2 = serd_model_begin(model); + if (serd_model_erase(model, iter1)) { + FAIL("Failed to erase statement\n"); + } + + // Check that erased iterator points to the next statement + if (!serd_statement_matches(serd_iter_get(iter1), + uri(world, 4), + uri(world, 5), + uri(world, 6), + 0)) { + FAIL("Erased iterator was not incremented\n"); + } + + // Check that other iterator has been invalidated + if (serd_iter_get(iter2)) { + FAIL("Successfully dereferenced invalidated iterator\n"); + } else if (!serd_iter_next(iter2)) { + FAIL("Successfully incremented invalidated iterator\n"); + } + + serd_iter_free(iter2); + serd_iter_free(iter1); + serd_model_free(model); + return 0; +} + +static int +test_add_erase(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, true); + + // Add (s p "hello") + SerdNode* s = uri(world, 1); + SerdNode* p = uri(world, 2); + SerdNode* hello = serd_node_new_literal("hello", NULL, NULL); + if (serd_model_add(model, s, p, hello, 0)) { + FAIL("Failed to add statement (s p \"hello\")\n"); + } else if (!serd_model_ask(model, s, p, hello, 0)) { + FAIL("Added statement (s p \"hello\") not found\n"); + } + + // Add (s p "hi") + SerdNode* hi = serd_node_new_literal("hi", NULL, NULL); + if (serd_model_add(model, s, p, hi, NULL)) { + FAIL("Failed to add statement (s p \"hi\")\n"); + } else if (!serd_model_ask(model, s, p, hi, 0)) { + FAIL("Added statement (s p \"hi\") not found\n"); + } + + // Erase (s p "hi") + SerdIter* iter = serd_model_find(model, s, p, hi, NULL); + if (serd_model_erase(model, iter)) { + FAIL("Failed to erase statement (s p \"hi\")\n"); + } else if (serd_model_num_quads(model) != 1) { + FAILF("Expected 1 node after erasure, not %zu\n", + serd_model_num_quads(model)); + } + serd_iter_free(iter); + + // Check that erased statement can not be found + iter = serd_model_find(model, s, p, hi, NULL); + if (!serd_iter_end(iter)) { + FAIL("Found removed statement (s p \"hi\")\n"); + } + serd_iter_free(iter); + + serd_model_free(model); + return 0; +} + +static int +test_erase_all(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, false); + generate(world, model, n_quads, NULL); + + SerdIter* iter = serd_model_begin(model); + while (!serd_iter_end(iter)) { + const SerdStatus st = serd_model_erase(model, iter); + if (st) { + FAILF("Failed to erase tuple via iterator (%s)\n", + serd_strerror(st)); + } + } + + serd_iter_free(iter); + serd_model_free(model); + return 0; +} + +static int +test_find_past_end(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, false); + SerdNode* s = uri(world, 1); + SerdNode* p = uri(world, 2); + SerdNode* o = uri(world, 3); + if (serd_model_add(model, s, p, o, 0)) { + FAIL("Failed to add statement\n"); + } else if (!serd_model_ask(model, s, p, o, 0)) { + FAIL("Added statement not found\n"); + } + + SerdNode* huge = uri(world, 999); + if (!serd_iter_end(serd_model_find(model, huge, huge, huge, 0))) { + FAIL("Found statement past end of model\n"); + } + + serd_model_free(model); + return 0; +} + +static int +test_triple_index_read(SerdWorld* world, const size_t n_quads) +{ + static const char* const index_names[6] = { "spo", "sop", "ops", + "osp", "pso", "pos" }; + + for (unsigned i = 0; i < 6; ++i) { + SerdModel* model = serd_model_new(world, (1 << i), false); + generate(world, model, n_quads, 0); + if (test_read(world, model, 0, n_quads)) { + FAILF("Failure reading index `%s'\n", index_names[i]); + } + serd_model_free(model); + } + + return 0; +} + +static int +test_quad_index_read(SerdWorld* world, const size_t n_quads) +{ + static const char* const index_names[6] = { "gspo", "gsop", "gops", + "gosp", "gpso", "gpos" }; + + for (unsigned i = 0; i < 6; ++i) { + SerdModel* model = serd_model_new(world, (1 << i), true); + SerdNode* graph = uri(world, 42); + generate(world, model, n_quads, graph); + if (test_read(world, model, graph, n_quads)) { + FAILF("Failure reading index `%s'\n", index_names[i]); + } + serd_model_free(model); + } + + return 0; +} + +static int +test_remove_graph(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, true); + + // Generate a couple of graphs + SerdNode* graph42 = uri(world, 42); + SerdNode* graph43 = uri(world, 43); + generate(world, model, 1, graph42); + generate(world, model, 1, graph43); + + // Remove one graph via iterator + SerdStatus st; + SerdIter* iter = serd_model_find(model, NULL, NULL, NULL, graph43); + while (!serd_iter_end(iter)) { + if ((st = serd_model_erase(model, iter))) { + FAILF("Remove by iterator failed (%s)\n", serd_strerror(st)); + } + } + serd_iter_free(iter); + + // Erase the first tuple (an element in the default graph) + iter = serd_model_begin(model); + if (serd_model_erase(model, iter)) { + FAIL("Failed to erase begin iterator on non-empty model\n"); + } + serd_iter_free(iter); + + // Ensure only the other graph is left + Quad pat = { 0, 0, 0, graph42 }; + for (iter = serd_model_begin(model); !serd_iter_end(iter); + serd_iter_next(iter)) { + if (!serd_statement_matches( + serd_iter_get(iter), pat[0], pat[1], pat[2], pat[3])) { + FAIL("Graph removal via iteration failed\n"); + } + } + serd_iter_free(iter); + + serd_model_free(model); + return 0; +} + +static int +test_default_graph(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, true); + SerdNode* s = uri(world, 1); + SerdNode* p = uri(world, 2); + SerdNode* o = uri(world, 3); + SerdNode* g1 = uri(world, 101); + SerdNode* g2 = uri(world, 102); + + // Insert the same statement into two graphs + if (serd_model_add(model, s, p, o, g1)) { + FAIL("Failed to insert statement into first graph\n"); + } + if (serd_model_add(model, s, p, o, g2)) { + FAIL("Failed to insert statement into second graph\n"); + } + + // Ensure we only see statement once in the default graph + if (serd_model_count(model, s, p, o, NULL) != 1) { + FAIL("Found duplicate triple in default graph\n"); + } + + serd_model_free(model); + return 0; +} + +int +main(int argc, char** argv) +{ + static const size_t n_quads = 300; + + serd_model_free(NULL); // Shouldn't crash + + typedef int (*TestFunc)(SerdWorld*, size_t); + + const TestFunc tests[] = { test_free_null, + test_get_world, + test_add_null, + test_add_with_iterator, + test_erase_with_iterator, + test_add_erase, + test_erase_all, + test_find_past_end, + test_triple_index_read, + test_quad_index_read, + test_remove_graph, + test_default_graph, + NULL }; + + SerdWorld* world = serd_world_new(); + for (const TestFunc* t = tests; *t; ++t) { + serd_world_set_error_sink(world, unexpected_error, NULL); + if ((*t)(world, n_quads)) { + return EXIT_FAILURE; + } + } + + serd_world_free(world); + return 0; +} diff --git a/wscript b/wscript index 88ee2e7b..660e63c0 100644 --- a/wscript +++ b/wscript @@ -34,6 +34,9 @@ def options(ctx): opt.add_option('--' + name, action='store_true', dest=name.replace('-', '_'), help=desc) + opt.add_option('--dump', type='string', default='', dest='dump', + help='dump debugging output (iter, search, write, all)') + def configure(conf): autowaf.display_header('Serd Configuration') conf.load('compiler_c', cache=True) @@ -64,6 +67,14 @@ def configure(conf): defines = ['_POSIX_C_SOURCE=200809L'], mandatory = False) + dump = Options.options.dump.split(',') + if 'all' in dump or 'iter' in dump: + conf.define('SERD_DEBUG_ITER', 1) + if 'all' in dump or 'search' in dump: + conf.define('SERD_DEBUG_SEARCH', 1) + if 'all' in dump or 'write' in dump: + conf.define('SERD_DEBUG_WRITE', 1) + autowaf.define(conf, 'SERD_VERSION', SERD_VERSION) autowaf.set_lib_env(conf, 'serd', SERD_VERSION) conf.write_config_header('serd_config.h', remove=False) @@ -78,14 +89,22 @@ def configure(conf): lib_source = ['src/base64.c', 'src/byte_source.c', 'src/env.c', + 'src/inserter.c', + 'src/iter.c', + 'src/model.c', 'src/n3.c', 'src/node.c', + 'src/nodes.c', 'src/reader.c', + 'src/statement.c', 'src/string.c', 'src/system.c', 'src/uri.c', 'src/world.c', - 'src/writer.c'] + 'src/writer.c', + 'src/zix/btree.c', + 'src/zix/digest.c', + 'src/zix/hash.c'] def build(bld): # C Headers @@ -144,7 +163,8 @@ def build(bld): # Test programs for prog in [('serdi_static', 'src/serdi.c'), ('serd_test', 'tests/serd_test.c'), - ('read_chunk_test', 'tests/read_chunk_test.c')]: + ('read_chunk_test', 'tests/read_chunk_test.c'), + ('model_test', 'tests/model_test.c')]: bld(features = 'c cprogram', source = prog[1], use = 'libserd_profiled', @@ -275,9 +295,17 @@ def show_diff(from_lines, to_lines, from_filename, to_filename): from_lines, to_lines, fromfile=from_filename, tofile=to_filename): sys.stderr.write(line) -def check_output(out_filename, check_filename, subst_from='', subst_to=''): +def check_output(out_filename, check_filename, subst_from='', subst_to='', sort=False): if not os.access(out_filename, os.F_OK): Logs.pprint('RED', 'FAIL: output %s is missing' % out_filename) + elif sort: + out_lines = sorted(set(io.open(out_filename, encoding='utf-8').readlines())) + check_lines = sorted(set(io.open(check_filename, encoding='utf-8').readlines())) + if out_lines != check_lines: + show_diff(check_lines, out_lines, check_filename, out_filename) + return False + else: + return True elif not file_equals(check_filename, out_filename, subst_from, subst_to): Logs.pprint('RED', 'FAIL: %s != %s' % (os.path.abspath(out_filename), check_filename)) @@ -389,14 +417,17 @@ def test_suite(ctx, base_uri, testdir, report, isyntax, osyntax, options=''): # Check output against test suite check_uri = model[test][mf + 'result'][0] check_path = file_uri_to_path(check_uri) + sort = '-m' in options result = autowaf.run_test( ctx, APPNAME, - lambda: check_output(action + '.out', check_path), + lambda: check_output(action + '.out', check_path, sort=sort), True, name=str(action) + ' check', quiet=True) # Run round-trip tests - test_thru(ctx, uri, action, check_path, - ' '.join(next(thru_options_iter)), isyntax, osyntax, options, quiet=True) + if not sort: + test_thru(ctx, uri, action, check_path, + ' '.join(next(thru_options_iter)), + isyntax, osyntax, options, quiet=True) # Write test report entry if report is not None: @@ -434,7 +465,7 @@ def test(ctx): os.environ['PATH'] = '.' + os.pathsep + os.getenv('PATH') autowaf.pre_test(ctx, APPNAME) - for test in ['serd_test', 'read_chunk_test']: + for test in ['serd_test', 'read_chunk_test', 'model_test']: autowaf.run_test(ctx, APPNAME, test, dirs=['.']) def test_ttl(in_name, expected_name): @@ -493,27 +524,28 @@ def test(ctx): 'serdi_static "file://%s/tests/good/manifest.ttl" > /dev/full' % srcdir, 1, name='write_error') - # Serd-specific test cases - serd_base = 'http://drobilla.net/sw/serd/tests/' - test_suite(ctx, serd_base + 'good/', 'good', None, 'Turtle', 'NTriples') - test_suite(ctx, serd_base + 'bad/', 'bad', None, 'Turtle', 'NTriples') - - # Standard test suites - with open('earl.ttl', 'w') as report: - report.write('@prefix earl: .\n' - '@prefix dc: .\n') - - with open(os.path.join(srcdir, 'serd.ttl')) as serd_ttl: - for line in serd_ttl: - report.write(line) - - w3c_base = 'http://www.w3.org/2013/' - test_suite(ctx, w3c_base + 'NTriplesTests/', - 'NTriplesTests', report, 'NTriples', 'NTriples') - test_suite(ctx, w3c_base + 'NQuadsTests/', - 'NQuadsTests', report, 'NQuads', 'NQuads') - test_suite(ctx, w3c_base + 'TriGTests/', - 'TriGTests', report, 'TriG', 'NQuads', '-a') + for opts in ['', '-m']: + # Serd-specific test cases + serd_base = 'http://drobilla.net/sw/serd/tests/' + test_suite(ctx, serd_base + 'good/', 'good', None, 'Turtle', 'NTriples', opts) + test_suite(ctx, serd_base + 'bad/', 'bad', None, 'Turtle', 'NTriples', opts) + + # Standard test suites + with open('earl.ttl', 'w') as report: + report.write('@prefix earl: .\n' + '@prefix dc: .\n') + + with open(os.path.join(srcdir, 'serd.ttl')) as serd_ttl: + for line in serd_ttl: + report.write(line) + + w3c_base = 'http://www.w3.org/2013/' + test_suite(ctx, w3c_base + 'NTriplesTests/', + 'NTriplesTests', report, 'NTriples', 'NTriples', opts) + test_suite(ctx, w3c_base + 'NQuadsTests/', + 'NQuadsTests', report, 'NQuads', 'NQuads', opts) + test_suite(ctx, w3c_base + 'TriGTests/', + 'TriGTests', report, 'TriG', 'NQuads', '-a' + opts) autowaf.post_test(ctx, APPNAME) if ctx.autowaf_tests[APPNAME]['failed'] > 0: -- cgit v1.2.1