From 0342270f81dc9c676a92422c4e73484fb44f6da8 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Sat, 12 May 2018 13:28:47 +0200 Subject: WIP: Add model --- doc/serdi.1 | 7 + serd/serd.h | 378 +++++++++++++++++++++++++++ src/inserter.c | 115 +++++++++ src/iter.c | 239 +++++++++++++++++ src/iter.h | 88 +++++++ src/log.h | 60 +++++ src/model.c | 649 ++++++++++++++++++++++++++++++++++++++++++++++ src/model.h | 43 ++++ src/node.c | 7 + src/node.h | 1 + src/range.c | 250 ++++++++++++++++++ src/range.h | 36 +++ src/serdi.c | 41 ++- src/statement.c | 32 +++ src/statement.h | 12 + src/string.c | 1 + src/zix/btree.c | 40 ++- src/zix/btree.h | 21 ++ tests/model_test.c | 742 +++++++++++++++++++++++++++++++++++++++++++++++++++++ wscript | 44 +++- 20 files changed, 2796 insertions(+), 10 deletions(-) create mode 100644 src/inserter.c create mode 100644 src/iter.c create mode 100644 src/iter.h create mode 100644 src/log.h create mode 100644 src/model.c create mode 100644 src/model.h create mode 100644 src/range.c create mode 100644 src/range.h create mode 100644 tests/model_test.c diff --git a/doc/serdi.1 b/doc/serdi.1 index 66d845c6..4b3c9c39 100644 --- a/doc/serdi.1 +++ b/doc/serdi.1 @@ -28,6 +28,13 @@ generated immediately as input arrives, rather than waiting until an entire page of input has arrived. With this option serdi uses one page less memory, but will likely be significantly slower. +.TP +\fB\-f\fR +Fast serialisation without object inlining. This affects only model +(non-streaming) serialisation, and disables searching and statement reordering +for pretty printing. Statements will be written in simple sorted order, which +is faster, but may result in less readable output in Turtle or TriG. + .TP \fB\-h\fR Print the command line options. diff --git a/serd/serd.h b/serd/serd.h index 0f883af9..31ea1a22 100644 --- a/serd/serd.h +++ b/serd/serd.h @@ -67,6 +67,18 @@ typedef struct SerdCursorImpl SerdCursor; /// Lexical environment for relative URIs or CURIEs (base URI and namespaces) typedef struct SerdEnvImpl SerdEnv; +/// An indexed set of statements +typedef struct SerdModelImpl SerdModel; + +/// A statement sink that inserts into a model +typedef struct SerdInserterImpl SerdInserter; + +/// Model Iterator +typedef struct SerdIterImpl SerdIter; + +/// Model Range +typedef struct SerdRangeImpl SerdRange; + /// Streaming parser that reads a text stream and writes to a statement sink typedef struct SerdReaderImpl SerdReader; @@ -86,6 +98,7 @@ typedef enum { SERD_ERR_UNKNOWN, ///< Unknown error SERD_ERR_BAD_SYNTAX, ///< Invalid syntax SERD_ERR_BAD_ARG, ///< Invalid argument + SERD_ERR_BAD_ITER, ///< Use of invalidated iterator SERD_ERR_NOT_FOUND, ///< Not found SERD_ERR_ID_CLASH, ///< Encountered clashing blank node IDs SERD_ERR_BAD_CURIE, ///< Invalid CURIE (e.g. prefix does not exist) @@ -113,6 +126,14 @@ typedef enum { /// Bitwise OR of SerdStatementFlag values typedef uint32_t SerdStatementFlags; +/// Flags that control style for a model serialisation +typedef enum { + SERD_NO_INLINE_OBJECTS = 1 << 0, /**< Do not inline objects where possible */ +} SerdSerialisationFlag; + +/// Bitwise OR of SerdStatementFlag values +typedef uint32_t SerdSerialisationFlags; + /** Type of a syntactic RDF node @@ -184,6 +205,20 @@ typedef enum { SERD_GRAPH = 3 ///< Graph ("context") } SerdField; +/// Indexing option +typedef enum { + SERD_INDEX_SPO = 1, ///< Subject, Predicate, Object + SERD_INDEX_SOP = 1 << 1, ///< Subject, Object, Predicate + SERD_INDEX_OPS = 1 << 2, ///< Object, Predicate, Subject + SERD_INDEX_OSP = 1 << 3, ///< Object, Subject, Predicate + SERD_INDEX_PSO = 1 << 4, ///< Predicate, Subject, Object + SERD_INDEX_POS = 1 << 5, ///< Predicate, Object, Subject + SERD_INDEX_GRAPHS = 1 << 6 ///< Support multiple graphs in model +} SerdModelFlag; + +/// Bitwise OR of SerdModelFlag values +typedef uint32_t SerdModelFlags; + /// A syntactic RDF node typedef struct SerdNodeImpl SerdNode; @@ -1183,6 +1218,224 @@ SERD_API void serd_nodes_deref(SerdNodes* nodes, const SerdNode* node); +/** + @} + @name Model + @{ +*/ + +/** + Create a new model + + @param world The world in which to make this model. + + @param flags Model options, including enabled indices, for example `SERD_SPO | + SERD_OPS`. Be sure to enable an index where the most significant node(s) + are not variables in your queries. For example, to make (? P O) queries, + enable either SERD_OPS or SERD_POS. +*/ +SERD_API +SerdModel* +serd_model_new(SerdWorld* world, SerdModelFlags flags); + +/// Return a deep copy of `model` +SERD_API +SerdModel* +serd_model_copy(const SerdModel* model); + +/// Return true iff `a` is equal to `b`, ignoring statement cursor metadata +SERD_API +bool +serd_model_equals(const SerdModel* a, const SerdModel* b); + +/// Close and free `model` +SERD_API +void +serd_model_free(SerdModel* model); + +/// Get the world associated with `model` +SERD_API +SerdWorld* +serd_model_get_world(SerdModel* model); + +/// Return the number of statements stored in `model` +SERD_API +size_t +serd_model_size(const SerdModel* model); + +/// Return true iff there are no statements stored in `model` +SERD_API +bool +serd_model_empty(const SerdModel* model); + +/// Return an iterator to the start of `model` +SERD_API +SerdIter* +serd_model_begin(const SerdModel* model); + +/// Return an iterator to the end of `model` +SERD_API +const SerdIter* +serd_model_end(const SerdModel* model); + +/// Return a range of all statements in `model` +SERD_API +SerdRange* +serd_model_all(const SerdModel* model); + +/** + Search for statements by a quad pattern + + @return An iterator to the first match, or NULL if no matches found. +*/ +SERD_API +SerdIter* +serd_model_find(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +/** + Search for statements by a quad pattern + + @return A range containins all matching statements. +*/ +SERD_API +SerdRange* +serd_model_range(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +/** + Search for a single node that matches a pattern + + Exactly one of `s`, `p`, `o` must be NULL. + This function is mainly useful for predicates that only have one value. + @return The first matching node, or NULL if no matches are found. +*/ +SERD_API +const SerdNode* +serd_model_get(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +/** + Search for a single statement that matches a pattern + + This function is mainly useful for predicates that only have one value. + + @return The first matching statement, or NULL if none are found. +*/ +SERD_API +const SerdStatement* +serd_model_get_statement(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +/// Return true iff a statement exists +SERD_API +bool +serd_model_ask(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +/// Return the number of matching statements +SERD_API +size_t +serd_model_count(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +/** + Add a statement to a model from nodes + + This function fails if there are any active iterators on `model`. +*/ +SERD_API +SerdStatus +serd_model_add(SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +/** + Add a statement to a model + + This function fails if there are any active iterators on `model`. +*/ +SERD_API +SerdStatus +serd_model_insert(SerdModel* model, const SerdStatement* statement); + +/** + Add a range of statements to a model + + This function fails if there are any active iterators on `model`. +*/ +SERD_API +SerdStatus +serd_model_add_range(SerdModel* model, SerdRange* range); + +/** + Remove a quad from a model via an iterator + + Calling this function invalidates all iterators on `model` except `iter`. + + @param model The model which `iter` points to. + @param iter Iterator to the element to erase, which is incremented to the + next value on return. +*/ +SERD_API +SerdStatus +serd_model_erase(SerdModel* model, SerdIter* iter); + +/** + Remove a range from a model + + Calling this function invalidates all iterators on `model` except `iter`. + + @param model The model which `range` points to. + @param range Range to erase, which will be empty on return; +*/ +SERD_API +SerdStatus +serd_model_erase_range(SerdModel* model, SerdRange* range); + +/** + @} + @name Inserter + @{ +*/ + +/// Create an inserter for writing statements to a model +SERD_API +SerdInserter* +serd_inserter_new(SerdModel* model, + SerdEnv* env, + const SerdNode* default_graph); + +/// Free an inserter +SERD_API +void +serd_inserter_free(SerdInserter* inserter); + +/// Return a sink interface that adds statements via `inserter` +SERD_API +SerdSink* +serd_inserter_get_sink(SerdInserter* inserter); + /** @} @name Statement @@ -1219,6 +1472,131 @@ SERD_API const SerdCursor* serd_statement_get_cursor(const SerdStatement* statement); +/** + Return true iff `a` is equal to `b`, ignoring statement cursor metadata + + Only returns true if nodes are equivalent, does not perform wildcard matching. +*/ +SERD_API +bool +serd_statement_equals(const SerdStatement* a, const SerdStatement* b); + +/** + Return true iff `statement` matches the given pattern + + The matching rules are the same used for querying: nodes match if they are + equivalent, and NULL acts as a wildcard that matches any node. +*/ +SERD_API +bool +serd_statement_matches(const SerdStatement* statement, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* graph); + +/** + @} + @name Iterator + @{ +*/ + +/// Return a new copy of `iter` +SERD_API +SerdIter* +serd_iter_copy(const SerdIter* iter); + +/// Return the statement pointed to by `iter` +SERD_API +const SerdStatement* +serd_iter_get(const SerdIter* iter); + +/** + Increment `iter` to point to the next statement + + @return True iff `iter` has reached the end. +*/ +SERD_API +bool +serd_iter_next(SerdIter* iter); + +/// Return true iff `lhs` is equal to `rhs` +SERD_API +bool +serd_iter_equals(const SerdIter* lhs, const SerdIter* rhs); + +/// Free `iter` +SERD_API +void +serd_iter_free(SerdIter* iter); + +/** + @} + @name Range + @{ +*/ + +/// Return a new copy of `range` +SERD_API +SerdRange* +serd_range_copy(const SerdRange* range); + +/// Free `range` +SERD_API +void +serd_range_free(SerdRange* range); + +/// Return the first statement in `range`, or NULL if `range` is empty +SERD_API +const SerdStatement* +serd_range_front(const SerdRange* range); + +/// Return true iff `lhs` is equal to `rhs` +SERD_API +bool +serd_range_equals(const SerdRange* lhs, const SerdRange* rhs); + +/// Increment the start of `range` to point to the next statement +SERD_API +bool +serd_range_next(SerdRange* range); + +/// Return the number of statements in `range` +SERD_API +size_t +serd_range_size(const SerdRange* range); + +/// Return true iff there are no statements in `range` +SERD_API +bool +serd_range_empty(const SerdRange* range); + +/// Return an iterator to the start of `range` +SERD_API +const SerdIter* +serd_range_begin(const SerdRange* range); + +/// Return an iterator to the end of `range` +SERD_API +const SerdIter* +serd_range_end(const SerdRange* range); + +/** + Write `range` to `sink` + + The serialisation style can be controlled with `flags`. The default is to + write statements in an order suited for pretty-printing with Turtle or TriG + with as many objects written inline as possible. If + `SERD_NO_INLINE_OBJECTS` is given, a simple sorted stream is written + instead, which is significantly faster since no searching is required, but + can result in ugly output for Turtle or Trig. +*/ +SERD_API +SerdStatus +serd_range_serialise(const SerdRange* range, + SerdSink* sink, + SerdSerialisationFlags flags); + /** @} @name Cursor diff --git a/src/inserter.c b/src/inserter.c new file mode 100644 index 00000000..720ce0dd --- /dev/null +++ b/src/inserter.c @@ -0,0 +1,115 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "model.h" +#include "sink.h" +#include "statement.h" +#include "world.h" + +#include "serd/serd.h" + +#include + +struct SerdInserterImpl { + SerdModel* model; + SerdNode* default_graph; + SerdSink iface; +}; + +static SerdStatus +serd_inserter_on_base(SerdInserter* inserter, const SerdNode* uri) +{ + return serd_env_set_base_uri(inserter->iface.env, uri); +} + +static SerdStatus +serd_inserter_on_prefix(SerdInserter* inserter, + const SerdNode* name, + const SerdNode* uri) +{ + return serd_env_set_prefix(inserter->iface.env, name, uri); +} + +static const SerdNode* +manage_or_intern(SerdNodes* nodes, SerdNode* manage, const SerdNode* intern) +{ + return manage ? serd_nodes_manage(nodes, manage) + : serd_nodes_intern(nodes, intern); +} + +static SerdStatus +serd_inserter_on_statement(SerdInserter* inserter, + const SerdStatementFlags flags, + const SerdStatement* statement) +{ + (void)flags; + + const SerdNode* const subject = serd_statement_get_subject(statement); + const SerdNode* const predicate = serd_statement_get_predicate(statement); + const SerdNode* const object = serd_statement_get_object(statement); + const SerdNode* const graph = serd_statement_get_graph(statement); + + // Attempt to expand all nodes to eliminate CURIEs + SerdNode* const s = serd_env_expand(inserter->iface.env, subject); + SerdNode* const p = serd_env_expand(inserter->iface.env, predicate); + SerdNode* const o = serd_env_expand(inserter->iface.env, object); + SerdNode* const g = serd_env_expand(inserter->iface.env, graph); + + SerdNodes* const nodes = inserter->model->world->nodes; + const SerdNode* model_graph = manage_or_intern(nodes, g, graph); + if (!model_graph) { + model_graph = serd_nodes_intern(nodes, inserter->default_graph); + } + + const SerdStatus st = + serd_model_add_internal(inserter->model, + statement->cursor, + manage_or_intern(nodes, s, subject), + manage_or_intern(nodes, p, predicate), + manage_or_intern(nodes, o, object), + model_graph); + + return st > SERD_FAILURE ? st : SERD_SUCCESS; +} + +SerdInserter* +serd_inserter_new(SerdModel* model, SerdEnv* env, const SerdNode* default_graph) +{ + SerdInserter* inserter = (SerdInserter*)calloc(1, sizeof(SerdInserter)); + inserter->model = model; + inserter->default_graph = serd_node_copy(default_graph); + inserter->iface.handle = inserter; + inserter->iface.env = env; + inserter->iface.base = (SerdBaseSink)serd_inserter_on_base; + inserter->iface.prefix = (SerdPrefixSink)serd_inserter_on_prefix; + inserter->iface.statement = (SerdStatementSink)serd_inserter_on_statement; + return inserter; +} + +void +serd_inserter_free(SerdInserter* inserter) +{ + if (inserter) { + serd_node_free(inserter->default_graph); + free(inserter); + } +} + +SerdSink* +serd_inserter_get_sink(SerdInserter* inserter) +{ + return &inserter->iface; +} diff --git a/src/iter.c b/src/iter.c new file mode 100644 index 00000000..eaa6de2c --- /dev/null +++ b/src/iter.c @@ -0,0 +1,239 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "iter.h" + +#include "log.h" +#include "model.h" +#include "statement.h" +#include "world.h" + +#include "serd/serd.h" +#include "zix/btree.h" + +#include +#include +#include + +static bool +serd_iter_pattern_matches(const SerdIter* iter, const SerdQuad pat) +{ + const SerdStatement* key = (const SerdStatement*)zix_btree_get(iter->cur); + for (int i = 0; i < iter->n_prefix; ++i) { + const int idx = orderings[iter->order][i]; + if (!serd_node_pattern_match(key->nodes[idx], pat[idx])) { + return false; + } + } + + return true; +} + +/** + Seek forward as necessary until `iter` points at a matching statement. + + @return true iff iterator reached end of valid range. +*/ +static bool +serd_iter_seek_match(SerdIter* iter, const SerdQuad pat) +{ + for (; !zix_btree_iter_is_end(iter->cur); + zix_btree_iter_increment(iter->cur)) { + const SerdStatement* s = (const SerdStatement*)zix_btree_get(iter->cur); + if (serd_statement_matches_quad(s, pat)) { + return false; // Found match + } else if (iter->mode == FILTER_RANGE && + !serd_iter_pattern_matches(iter, pat)) { + zix_btree_iter_free(iter->cur); + iter->cur = NULL; + return true; // Reached end of range + } + } + + assert(zix_btree_iter_is_end(iter->cur)); + return true; // Reached end of index +} + +static bool +check_version(const SerdIter* const iter) +{ + if (iter->version != iter->model->version) { + serd_world_errorf(iter->model->world, + SERD_ERR_BAD_ITER, + "attempt to use invalidated iterator\n"); + return false; + } + return true; +} + +SerdIter* +serd_iter_new(const SerdModel* model, + ZixBTreeIter* cur, + const SerdQuad pat, + SerdOrder order, + SearchMode mode, + int n_prefix) +{ + SerdIter* iter = (SerdIter*)calloc(1, sizeof(SerdIter)); + iter->model = model; + iter->cur = cur; + iter->order = order; + iter->mode = mode; + iter->n_prefix = n_prefix; + iter->version = model->version; + + switch (iter->mode) { + case ALL: + case RANGE: + assert(zix_btree_iter_is_end(iter->cur) || + serd_statement_matches_quad( + ((const SerdStatement*)zix_btree_get(iter->cur)), pat)); + break; + case FILTER_RANGE: + case FILTER_ALL: + serd_iter_seek_match(iter, pat); + break; + } + + // Replace (possibly temporary) nodes in pattern with nodes from the model + if (!zix_btree_iter_is_end(iter->cur)) { + const SerdStatement* s = (const SerdStatement*)zix_btree_get(iter->cur); + for (int i = 0; i < TUP_LEN; ++i) { + if (pat[i]) { + iter->pat[i] = s->nodes[i]; + } + } + } + +#ifdef SERD_DEBUG_ITER + if (!zix_btree_iter_is_end(iter->cur)) { + const SerdStatement* statement = serd_iter_get(iter); + SERD_ITER_LOG("New %p pat=" TUP_FMT " realpat=" TUP_FMT + " order=%d prefix=%d mode=%d cur=" TUP_FMT "\n", + (void*)iter, + TUP_FMT_ARGS(pat), + TUP_FMT_ARGS(iter->pat), + iter->order, + iter->n_prefix, + iter->mode, + TUP_FMT_ARGS(statement->nodes)); + } else { + SERD_ITER_LOG("New %p pat=" TUP_FMT " mode=%d at end\n", + (void*)iter, + TUP_FMT_ARGS(pat), + iter->mode); + } +#endif + + return iter; +} + +SerdIter* +serd_iter_copy(const SerdIter* iter) +{ + if (!iter) { + return NULL; + } + + SerdIter* copy = (SerdIter*)malloc(sizeof(SerdIter)); + memcpy(copy, iter, sizeof(SerdIter)); + copy->cur = zix_btree_iter_copy(iter->cur); + return copy; +} + +const SerdStatement* +serd_iter_get(const SerdIter* iter) +{ + return check_version(iter) ? (const SerdStatement*)zix_btree_get(iter->cur) + : NULL; +} + +bool +serd_iter_scan_next(SerdIter* iter) +{ + if (zix_btree_iter_is_end(iter->cur)) { + return true; + } + + switch (iter->mode) { + case ALL: + // At the end if the cursor is (assigned above) + break; + case RANGE: + SERD_ITER_LOG("%p range next\n", (void*)iter); + // At the end if the MSNs no longer match + if (!serd_iter_pattern_matches(iter, iter->pat)) { + zix_btree_iter_free(iter->cur); + iter->cur = NULL; + SERD_ITER_LOG("%p reached non-match end\n", (void*)iter); + } + break; + case FILTER_RANGE: + case FILTER_ALL: + // Seek forward to next match + serd_iter_seek_match(iter, iter->pat); + break; + } + + if (zix_btree_iter_is_end(iter->cur)) { + SERD_ITER_LOG("%p reached end\n", (void*)iter); + return true; + } else { +#ifdef SERD_DEBUG_ITER + SERD_ITER_LOG("%p reached " TUP_FMT "\n", + (void*)iter, + TUP_FMT_ARGS(serd_iter_get(iter)->nodes)); +#endif + return false; + } +} + +bool +serd_iter_next(SerdIter* iter) +{ + if (zix_btree_iter_is_end(iter->cur) || !check_version(iter)) { + return true; + } + + zix_btree_iter_increment(iter->cur); + return serd_iter_scan_next(iter); +} + +bool +serd_iter_equals(const SerdIter* lhs, const SerdIter* rhs) +{ + if (!lhs && !rhs) { + return true; + } + + return (lhs && rhs && lhs->model == rhs->model && + zix_btree_iter_equals(lhs->cur, rhs->cur) && + serd_node_pattern_match(lhs->pat[0], rhs->pat[0]) && + serd_node_pattern_match(lhs->pat[1], rhs->pat[1]) && + serd_node_pattern_match(lhs->pat[2], rhs->pat[2]) && + serd_node_pattern_match(lhs->pat[3], rhs->pat[3]) && + lhs->order == rhs->order && lhs->mode == rhs->mode && + lhs->n_prefix == rhs->n_prefix); +} + +void +serd_iter_free(SerdIter* iter) +{ + if (iter) { + zix_btree_iter_free(iter->cur); + free(iter); + } +} diff --git a/src/iter.h b/src/iter.h new file mode 100644 index 00000000..ac3e4b16 --- /dev/null +++ b/src/iter.h @@ -0,0 +1,88 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_ITER_H +#define SERD_ITER_H + +#include "statement.h" + +#include "serd/serd.h" +#include "zix/btree.h" + +#include + +/** Triple ordering */ +typedef enum { + SPO, ///< Subject, Predicate, Object + SOP, ///< Subject, Object, Predicate + OPS, ///< Object, Predicate, Subject + OSP, ///< Object, Subject, Predicate + PSO, ///< Predicate, Subject, Object + POS, ///< Predicate, Object, Subject + GSPO, ///< Graph, Subject, Predicate, Object + GSOP, ///< Graph, Subject, Object, Predicate + GOPS, ///< Graph, Object, Predicate, Subject + GOSP, ///< Graph, Object, Subject, Predicate + GPSO, ///< Graph, Predicate, Subject, Object + GPOS ///< Graph, Predicate, Object, Subject +} SerdOrder; + +/** Mode for searching or iteration */ +typedef enum { + ALL, ///< Iterate over entire store + RANGE, ///< Iterate over range with equal prefix + FILTER_RANGE, ///< Iterate over range with equal prefix, filtering + FILTER_ALL ///< Iterate to end of store, filtering +} SearchMode; + +struct SerdIterImpl { + const SerdModel* model; ///< Model being iterated over + ZixBTreeIter* cur; ///< Current DB cursor + SerdQuad pat; ///< Pattern (in ordering order) + SerdOrder order; ///< Store order (which index) + SearchMode mode; ///< Iteration mode + int n_prefix; ///< Prefix for RANGE and FILTER_RANGE + uint64_t version; ///< Model version when iterator was created +}; + +#define NUM_ORDERS 12 +#define TUP_LEN 4 + +/** + Quads of indices for each order, from most to least significant + (array indexed by SordOrder) +*/ +static const int orderings[NUM_ORDERS][TUP_LEN] = { + { 0, 1, 2, 3 }, { 0, 2, 1, 3 }, // SPO, SOP + { 2, 1, 0, 3 }, { 2, 0, 1, 3 }, // OPS, OSP + { 1, 0, 2, 3 }, { 1, 2, 0, 3 }, // PSO, POS + { 3, 0, 1, 2 }, { 3, 0, 2, 1 }, // GSPO, GSOP + { 3, 2, 1, 0 }, { 3, 2, 0, 1 }, // GOPS, GOSP + { 3, 1, 0, 2 }, { 3, 1, 2, 0 } // GPSO, GPOS +}; + +SerdIter* serd_iter_new(const SerdModel* model, + ZixBTreeIter* cur, + const SerdQuad pat, + SerdOrder order, + SearchMode mode, + int n_prefix); + +bool serd_iter_scan_next(SerdIter* iter); + +bool serd_quad_match(const SerdQuad x, const SerdQuad y); + +#endif // SERD_ITER_H diff --git a/src/log.h b/src/log.h new file mode 100644 index 00000000..c60cb326 --- /dev/null +++ b/src/log.h @@ -0,0 +1,60 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_LOG_H +#define SERD_LOG_H + +#ifdef SERD_DEBUG_SEARCH +/** String name of each ordering (array indexed by SordOrder) */ +static const char* const order_names[] = { + "spo", "sop", "ops", "osp", "pso", "pos", + "gspo", "gsop", "gops", "gosp", "gpso", "gpos" +}; +#endif + +#define TUP_FMT "(%s %s %s %s)" +#define TUP_FMT_ELEM(e) ((e) ? serd_node_get_string(e) : "*") +#define TUP_FMT_ARGS(t) \ + TUP_FMT_ELEM((t)[0]), \ + TUP_FMT_ELEM((t)[1]), \ + TUP_FMT_ELEM((t)[2]), \ + TUP_FMT_ELEM((t)[3]) + +#define STATEMENT_FMT_ARGS(s) \ + TUP_FMT_ELEM(serd_statement_get_subject(s)), \ + TUP_FMT_ELEM(serd_statement_get_predicate(s)), \ + TUP_FMT_ELEM(serd_statement_get_object(s)), \ + TUP_FMT_ELEM(serd_statement_get_graph(s)) + +#define SERD_LOG(prefix, ...) fprintf(stderr, "serd." prefix ": " __VA_ARGS__) + +#ifdef SERD_DEBUG_ITER +# define SERD_ITER_LOG(...) SERD_LOG("iter", __VA_ARGS__) +#else +# define SERD_ITER_LOG(...) +#endif +#ifdef SERD_DEBUG_SEARCH +# define SERD_FIND_LOG(...) SERD_LOG("search", __VA_ARGS__) +#else +# define SERD_FIND_LOG(...) +#endif +#ifdef SERD_DEBUG_WRITE +# define SERD_WRITE_LOG(...) SERD_LOG("write", __VA_ARGS__) +#else +# define SERD_WRITE_LOG(...) +#endif + +#endif // SERD_LOG_H diff --git a/src/model.c b/src/model.c new file mode 100644 index 00000000..2e0b824e --- /dev/null +++ b/src/model.c @@ -0,0 +1,649 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "model.h" + +#include "cursor.h" +#include "iter.h" +#include "log.h" +#include "node.h" +#include "range.h" +#include "statement.h" +#include "world.h" + +#include "zix/btree.h" + +#include +#include +#include +#include + +#define DEFAULT_ORDER SPO +#define DEFAULT_GRAPH_ORDER GSPO + +/** + Compare quads lexicographically, ignoring graph. + + NULL IDs (equal to 0) are treated as wildcards, always less than every + other possible ID, except itself. +*/ +static int +serd_triple_compare(const void* x, const void* y, void* user_data) +{ + const int* const ordering = (const int*)user_data; + const SerdStatement* const s = (const SerdStatement*)x; + const SerdStatement* const t = (const SerdStatement*)y; + + for (int i = 0; i < TUP_LEN; ++i) { + const int o = ordering[i]; + if (o != SERD_GRAPH) { + const int cmp = + serd_node_wildcard_compare(s->nodes[o], t->nodes[o]); + if (cmp) { + return cmp; + } + } + } + + return 0; +} + +/** + Compare quads lexicographically, with exact (non-wildcard) graph matching. +*/ +static int +serd_quad_compare(const void* x, const void* y, void* user_data) +{ + const SerdStatement* const s = (const SerdStatement*)x; + const SerdStatement* const t = (const SerdStatement*)y; + + // Compare graph without wildcard matching + const int cmp = serd_node_compare( + s->nodes[SERD_GRAPH], t->nodes[SERD_GRAPH]); + + return cmp ? cmp : serd_triple_compare(x, y, user_data); +} + +/** + Return true iff `serd` has an index for `order`. + If `graphs` is true, `order` will be modified to be the + corresponding order with a G prepended (so G will be the MSN). +*/ +static inline bool +serd_model_has_index(const SerdModel* model, + SerdOrder* order, + int* n_prefix, + bool graphs) +{ + if (graphs) { + *order = (SerdOrder)(*order + GSPO); + *n_prefix += 1; + } + + return model->indices[*order]; +} + +/** + Return the best available index for a pattern. + @param pat Pattern in standard (S P O G) order + @param mode Set to the (best) iteration mode for iterating over results + @param n_prefix Set to the length of the range prefix + (for `mode` == RANGE and `mode` == FILTER_RANGE) +*/ +static SerdOrder +serd_model_best_index(const SerdModel* model, + const SerdQuad pat, + SearchMode* mode, + int* n_prefix) +{ + const bool graph_search = (pat[SERD_GRAPH] != 0); + + const unsigned sig = ((pat[0] ? 1 : 0) * 0x100 + + (pat[1] ? 1 : 0) * 0x010 + + (pat[2] ? 1 : 0) * 0x001); + + SerdOrder good[2] = { (SerdOrder)-1, (SerdOrder)-1 }; + +#define PAT_CASE(sig, m, g0, g1, np) \ + case sig: \ + *mode = m; \ + good[0] = g0; \ + good[1] = g1; \ + *n_prefix = np; \ + break + + // Good orderings that don't require filtering + *mode = RANGE; + *n_prefix = 0; + switch (sig) { + case 0x000: + assert(graph_search); + *mode = RANGE; + *n_prefix = 1; + return DEFAULT_GRAPH_ORDER; + case 0x111: + *mode = RANGE; + *n_prefix = graph_search ? 4 : 3; + return graph_search ? DEFAULT_GRAPH_ORDER : DEFAULT_ORDER; + + PAT_CASE(0x001, RANGE, OPS, OSP, 1); + PAT_CASE(0x010, RANGE, POS, PSO, 1); + PAT_CASE(0x011, RANGE, OPS, POS, 2); + PAT_CASE(0x100, RANGE, SPO, SOP, 1); + PAT_CASE(0x101, RANGE, SOP, OSP, 2); + PAT_CASE(0x110, RANGE, SPO, PSO, 2); + } + + if (*mode == RANGE) { + if (serd_model_has_index(model, &good[0], n_prefix, graph_search)) { + return good[0]; + } else if (serd_model_has_index( + model, &good[1], n_prefix, graph_search)) { + return good[1]; + } + } + + // Not so good orderings that require filtering, but can + // still be constrained to a range + switch (sig) { + PAT_CASE(0x011, FILTER_RANGE, OSP, PSO, 1); + PAT_CASE(0x101, FILTER_RANGE, SPO, OPS, 1); + // SPO is always present, so 0x110 is never reached here + default: break; + } + + if (*mode == FILTER_RANGE) { + if (serd_model_has_index(model, &good[0], n_prefix, graph_search)) { + return good[0]; + } else if (serd_model_has_index( + model, &good[1], n_prefix, graph_search)) { + return good[1]; + } + } + + if (graph_search) { + *mode = FILTER_RANGE; + *n_prefix = 1; + return DEFAULT_GRAPH_ORDER; + } else { + *mode = FILTER_ALL; + return DEFAULT_ORDER; + } +} + +SerdModel* +serd_model_new(SerdWorld* world, SerdModelFlags flags) +{ + SerdModel* model = (SerdModel*)calloc(1, sizeof(struct SerdModelImpl)); + + flags |= SERD_INDEX_SPO; // SPO index is mandatory + + model->world = world; + model->flags = flags; + + for (unsigned i = 0; i < (NUM_ORDERS / 2); ++i) { + const int* const ordering = orderings[i]; + const int* const g_ordering = orderings[i + (NUM_ORDERS / 2)]; + + if (flags & (1 << i)) { + model->indices[i] = + zix_btree_new((ZixComparator)serd_triple_compare, + (const void*)ordering, + NULL); + if (flags & SERD_INDEX_GRAPHS) { + model->indices[i + (NUM_ORDERS / 2)] = + zix_btree_new((ZixComparator)serd_quad_compare, + (const void*)g_ordering, + NULL); + } + } + } + + // Create end iterator + const SerdOrder order = model->indices[GSPO] ? GSPO : SPO; + ZixBTreeIter* cur = zix_btree_end(model->indices[order]); + const SerdQuad pat = { 0, 0, 0, 0 }; + + model->end = serd_iter_new(model, cur, pat, order, ALL, 0); + + return model; +} + +SerdModel* +serd_model_copy(const SerdModel* model) +{ + SerdModel* copy = serd_model_new(model->world, model->flags); + + SerdRange* all = serd_model_all(model); + serd_model_add_range(copy, all); + serd_range_free(all); + + assert(serd_model_size(model) == serd_model_size(copy)); + return copy; +} + +SERD_API +bool +serd_model_equals(const SerdModel* a, const SerdModel* b) +{ + if (!a && !b) { + return true; + } else if (!a || !b || serd_model_size(a) != serd_model_size(b)) { + return false; + } + + SerdRange* ra = serd_model_all(a); + SerdRange* rb = serd_model_all(b); + + bool result = true; + while (!serd_range_empty(ra) && !serd_range_empty(rb)) { + if (!serd_statement_equals(serd_range_front(ra), + serd_range_front(rb))) { + result = false; + break; + } + + serd_range_next(ra); + serd_range_next(rb); + } + + result = result && serd_range_empty(ra) && serd_range_empty(rb); + + serd_range_free(ra); + serd_range_free(rb); + return result; +} + +static void +serd_model_drop_statement(SerdModel* model, SerdStatement* statement) + +{ + for (int i = 0; i < TUP_LEN; ++i) { + if (statement->nodes[i]) { + serd_nodes_deref(model->world->nodes, statement->nodes[i]); + } + } + + if (statement->cursor && statement->cursor->file) { + serd_nodes_deref(model->world->nodes, statement->cursor->file); + } + + free(statement->cursor); + free(statement); +} + +void +serd_model_free(SerdModel* model) +{ + if (!model) { + return; + } + + serd_iter_free(model->end); + + ZixBTree* index = model->indices[model->indices[DEFAULT_GRAPH_ORDER] + ? DEFAULT_GRAPH_ORDER + : DEFAULT_ORDER]; + + // Free statements + ZixBTreeIter* t = zix_btree_begin(index); + for (; !zix_btree_iter_is_end(t); zix_btree_iter_increment(t)) { + SerdStatement* s = (SerdStatement*)zix_btree_get(t); + free(s->cursor); + free(s); + } + zix_btree_iter_free(t); + + // Free indices + for (unsigned o = 0; o < NUM_ORDERS; ++o) { + if (model->indices[o]) { + zix_btree_free(model->indices[o]); + } + } + + free(model); +} + +SerdWorld* +serd_model_get_world(SerdModel* model) +{ + return model->world; +} + +size_t +serd_model_size(const SerdModel* model) +{ + const SerdOrder order = model->indices[GSPO] ? GSPO : SPO; + return zix_btree_size(model->indices[order]); +} + +bool +serd_model_empty(const SerdModel* model) +{ + return serd_model_size(model) == 0; +} + +SerdIter* +serd_model_begin(const SerdModel* model) +{ + if (serd_model_size(model) == 0) { + return serd_iter_copy(serd_model_end(model)); + } else { + const SerdOrder order = model->indices[GSPO] ? GSPO : SPO; + ZixBTreeIter* cur = zix_btree_begin(model->indices[order]); + const SerdQuad pat = { 0, 0, 0, 0 }; + return serd_iter_new(model, cur, pat, order, ALL, 0); + } +} + +const SerdIter* +serd_model_end(const SerdModel* model) +{ + return model->end; +} + +SerdRange* +serd_model_all(const SerdModel* model) +{ + return serd_range_new(serd_model_begin(model), + serd_iter_copy(serd_model_end(model))); +} + +SerdIter* +serd_model_find(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + const SerdQuad pat = { s, p, o, g }; + if (!pat[0] && !pat[1] && !pat[2] && !pat[3]) { + return serd_model_begin(model); + } + + SearchMode mode; + int n_prefix; + const SerdOrder index_order = + serd_model_best_index(model, pat, &mode, &n_prefix); + + SERD_FIND_LOG("Find " TUP_FMT " index=%s mode=%d n_prefix=%d\n", + TUP_FMT_ARGS(pat), + order_names[index_order], + mode, + n_prefix); + + ZixBTree* const db = model->indices[index_order]; + ZixBTreeIter* cur = NULL; + + if (mode == FILTER_ALL) { + // No prefix shared with an index at all, linear search (worst case) + SERD_FIND_LOG("warning: Linear model search\n"); + cur = zix_btree_begin(db); + } else if (mode == FILTER_RANGE) { + /* Some prefix, but filtering still required. Build a search pattern + with only the prefix to find the lower bound in log time. */ + SERD_FIND_LOG("warning: Linear range search\n"); + SerdQuad prefix_pat = { NULL, NULL, NULL, NULL }; + const int* const ordering = orderings[index_order]; + for (int i = 0; i < n_prefix; ++i) { + prefix_pat[ordering[i]] = pat[ordering[i]]; + } + zix_btree_lower_bound(db, prefix_pat, &cur); + } else { + // Ideal case, pattern matches an index with no filtering required + zix_btree_lower_bound(db, pat, &cur); + } + + if (zix_btree_iter_is_end(cur)) { + SERD_FIND_LOG("No match found, iterator at end\n"); + zix_btree_iter_free(cur); + return NULL; + } + + const SerdStatement* const key = (const SerdStatement*)zix_btree_get(cur); + if (!key || (mode == RANGE && !serd_statement_matches_quad(key, pat))) { + SERD_FIND_LOG("No match found, cursor at " TUP_FMT "\n", + TUP_FMT_ARGS(key->nodes)); + + zix_btree_iter_free(cur); + return NULL; + } + + return serd_iter_new(model, cur, pat, index_order, mode, n_prefix); +} + +SerdRange* +serd_model_range(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + if (!s && !p && !o && !g) { + return serd_range_new(serd_model_begin(model), + serd_iter_copy(serd_model_end(model))); + } + + SerdIter* begin = serd_model_find(model, s, p, o, g); + SerdIter* end = begin ? serd_iter_new(model, + NULL, + begin->pat, + begin->order, + begin->mode, + begin->n_prefix) + : NULL; + return serd_range_new(begin, end); +} + +const SerdNode* +serd_model_get(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + const SerdStatement* statement = + serd_model_get_statement(model, s, p, o, g); + + if (statement) { + if (!s) { + return serd_statement_get_subject(statement); + } else if (!p) { + return serd_statement_get_predicate(statement); + } else if (!o) { + return serd_statement_get_object(statement); + } + } + + return NULL; +} + +const SerdStatement* +serd_model_get_statement(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + if ((bool)s + (bool)p + (bool)o != 2) { + return NULL; + } + + SerdIter* const i = serd_model_find(model, s, p, o, g); + if (i && i->cur) { + const SerdStatement* statement = serd_iter_get(i); + serd_iter_free(i); + return statement; + } + + return NULL; +} + +size_t +serd_model_count(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + SerdRange* const range = serd_model_range(model, s, p, o, g); + const size_t count = serd_range_size(range); + serd_range_free(range); + return count; +} + +bool +serd_model_ask(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + SerdIter* iter = serd_model_find(model, s, p, o, g); + bool ret = (iter != NULL); + serd_iter_free(iter); + return ret; +} + +static SerdCursor* +serd_model_intern_cursor(SerdModel* model, const SerdCursor* cursor) +{ + if (cursor) { + SerdCursor* copy = (SerdCursor*)calloc(1, sizeof(SerdCursor)); + + copy->file = serd_nodes_intern(model->world->nodes, cursor->file); + copy->line = cursor->line; + copy->col = cursor->col; + return copy; + } + + return NULL; +} + +SerdStatus +serd_model_add_internal(SerdModel* model, + const SerdCursor* cursor, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + SerdStatement* statement = (SerdStatement*)calloc(1, sizeof(SerdStatement)); + + statement->nodes[0] = s; + statement->nodes[1] = p; + statement->nodes[2] = o; + statement->nodes[3] = g; + statement->cursor = serd_model_intern_cursor(model, cursor); + + SERD_WRITE_LOG("Add " TUP_FMT "\n", TUP_FMT_ARGS(statement->nodes)); + + bool added = false; + for (unsigned i = 0; i < NUM_ORDERS; ++i) { + if (model->indices[i]) { + if (!zix_btree_insert(model->indices[i], statement)) { + added = true; + } else if (i == GSPO) { + break; // Statement already indexed + } + } + } + + ++model->version; + if (added) { + return SERD_SUCCESS; + } + + serd_model_drop_statement(model, statement); + return SERD_FAILURE; +} + +SerdStatus +serd_model_add(SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + if (!s || !p || !o) { + return serd_world_errorf(model->world, + SERD_ERR_BAD_ARG, + "attempt to add statement with NULL field\n"); + } + + return serd_model_add_internal(model, + NULL, + serd_nodes_intern(model->world->nodes, s), + serd_nodes_intern(model->world->nodes, p), + serd_nodes_intern(model->world->nodes, o), + serd_nodes_intern(model->world->nodes, g)); +} + +SerdStatus +serd_model_insert(SerdModel* model, const SerdStatement* statement) +{ + return serd_model_add_internal(model, + serd_statement_get_cursor(statement), + serd_statement_get_subject(statement), + serd_statement_get_predicate(statement), + serd_statement_get_object(statement), + serd_statement_get_graph(statement)); +} + +SerdStatus +serd_model_add_range(SerdModel* model, SerdRange* range) +{ + SerdStatus st = SERD_SUCCESS; + for (; !st && !serd_range_empty(range); serd_range_next(range)) { + st = serd_model_insert(model, serd_range_front(range)); + } + + return st; +} + +SerdStatus +serd_model_erase(SerdModel* model, SerdIter* iter) +{ + const SerdStatement* statement = serd_iter_get(iter); + + SERD_WRITE_LOG("Remove " TUP_FMT "\n", TUP_FMT_ARGS(statement->nodes)); + + SerdStatement* removed = NULL; + for (unsigned i = 0; i < NUM_ORDERS; ++i) { + if (model->indices[i]) { + zix_btree_remove(model->indices[i], + statement, + (void**)&removed, + i == iter->order ? &iter->cur : NULL); + } + } + serd_iter_scan_next(iter); + + serd_model_drop_statement(model, removed); + iter->version = ++model->version; + + return SERD_SUCCESS; +} + +SerdStatus +serd_model_erase_range(SerdModel* model, SerdRange* range) +{ + while (!serd_range_empty(range)) { + serd_model_erase(model, range->begin); + } + + return SERD_SUCCESS; +} diff --git a/src/model.h b/src/model.h new file mode 100644 index 00000000..b4fdbfa3 --- /dev/null +++ b/src/model.h @@ -0,0 +1,43 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_MODEL_H +#define SERD_MODEL_H + +#include "serd/serd.h" +#include "zix/btree.h" + +#include +#include + +struct SerdModelImpl +{ + SerdWorld* world; ///< World this model is a part of + SerdModelFlags flags; ///< Active indices and features + ZixBTree* indices[12]; ///< Trees of SordQuad + SerdIter* end; ///< End iterator (always the same) + uint64_t version; ///< Version incremented on every change +}; + +SerdStatus +serd_model_add_internal(SerdModel* model, + const SerdCursor* cursor, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +#endif // SERD_MODEL_H diff --git a/src/node.c b/src/node.c index 65bc6fb6..48511ca4 100644 --- a/src/node.c +++ b/src/node.c @@ -331,6 +331,13 @@ serd_node_compare(const SerdNode* a, const SerdNode* b) serd_node_maybe_get_meta_c(b)); } +/** Compare nodes, considering NULL a wildcard match. */ +int +serd_node_wildcard_compare(const SerdNode* a, const SerdNode* b) +{ + return (!a || !b) ? 0 : serd_node_compare(a, b); +} + static size_t serd_uri_string_length(const SerdURI* uri) { diff --git a/src/node.h b/src/node.h index 53936ca3..afe3d93f 100644 --- a/src/node.h +++ b/src/node.h @@ -46,5 +46,6 @@ void serd_node_set(SerdNode** dst, const SerdNode* src); size_t serd_node_total_size(const SerdNode* node); void serd_node_zero_pad(SerdNode* node); SerdNode* serd_new_resolved_uri_i(const char* str, const SerdURI* base); +int serd_node_wildcard_compare(const SerdNode* a, const SerdNode* b); #endif // SERD_NODE_H diff --git a/src/range.c b/src/range.c new file mode 100644 index 00000000..973c6a81 --- /dev/null +++ b/src/range.c @@ -0,0 +1,250 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "range.h" + +#include "iter.h" +#include "log.h" +#include "model.h" +#include "sink.h" +#include "world.h" + +#include +#include + +typedef enum { NAMED, ANON_S, ANON_O, LIST_S, LIST_O } NodeStyle; + +static SerdStatus +write_range_statement(SerdSink* sink, + const SerdModel* model, + unsigned depth, + SerdStatementFlags flags, + const SerdStatement* statement); + +SerdRange* +serd_range_new(SerdIter* const begin, SerdIter* const end) +{ + SerdRange* range = (SerdRange*)malloc(sizeof(SerdRange)); + + range->begin = begin; + range->end = end; + + return range; +} + +SerdRange* +serd_range_copy(const SerdRange* range) +{ + SerdRange* copy = (SerdRange*)malloc(sizeof(SerdRange)); + + memcpy(copy, range, sizeof(SerdRange)); + copy->begin = serd_iter_copy(range->begin); + copy->end = serd_iter_copy(range->end); + + return copy; +} + +void +serd_range_free(SerdRange* range) +{ + serd_iter_free(range->begin); + serd_iter_free(range->end); + free(range); +} + +const SerdStatement* +serd_range_front(const SerdRange* range) +{ + return serd_iter_get(range->begin); +} + +bool +serd_range_equals(const SerdRange* lhs, const SerdRange* rhs) +{ + return (!lhs && !rhs) || + (lhs && rhs && serd_iter_equals(lhs->begin, rhs->begin) && + serd_iter_equals(lhs->end, rhs->end)); +} + +bool +serd_range_next(SerdRange* range) +{ + return serd_iter_next(range->begin); +} + +size_t +serd_range_size(const SerdRange* range) +{ + SerdRange* r = serd_range_copy(range); + uint64_t n = 0; + for (; !serd_range_empty(r); serd_range_next(r)) { + ++n; + } + serd_range_free(r); + return n; +} + +bool +serd_range_empty(const SerdRange* range) +{ + return !range || serd_iter_equals(range->begin, range->end); +} + +const SerdIter* +serd_range_begin(const SerdRange* range) +{ + return range->begin; +} + +const SerdIter* +serd_range_end(const SerdRange* range) +{ + return range->end; +} + +static NodeStyle +get_node_style(const SerdModel* model, const SerdNode* object) +{ + const size_t num_as_object = + serd_model_count(model, NULL, NULL, object, NULL); + if (serd_node_get_type(object) != SERD_BLANK || num_as_object > 1) { + return NAMED; // Non-blank and/or blank referred to several times + } + + if (serd_model_ask(model, object, model->world->rdf_first, NULL, NULL) && + serd_model_ask(model, object, model->world->rdf_rest, NULL, NULL)) { + return num_as_object == 0 ? LIST_S : LIST_O; + } + + return num_as_object == 0 ? ANON_S : ANON_O; +} + +static SerdStatus +write_range_internal(SerdSink* sink, + const unsigned depth, + const SerdStatementFlags flags, + SerdRange* range) +{ + (void)flags; + + SerdStatus st = SERD_SUCCESS; + const SerdModel* model = range && range->begin ? range->begin->model : NULL; + for (; !st && !serd_range_empty(range); serd_range_next(range)) { + st = write_range_statement( + sink, model, depth, 0, serd_range_front(range)); + } + + return st; +} + +static SerdStatus +write_list(SerdSink* sink, + const SerdModel* model, + const unsigned depth, + const SerdNode* object) +{ + SerdStatus st = SERD_SUCCESS; + const SerdWorld* world = model->world; + const SerdNode* first = world->rdf_first; + const SerdNode* rest = world->rdf_rest; + const SerdNode* nil = world->rdf_nil; + + SerdIter* f = serd_model_find(model, object, first, NULL, NULL); + while (!st && f && !serd_node_equals(object, nil)) { + const SerdStatement* fs = serd_iter_get(f); + st = write_range_statement(sink, model, depth + 1, 0, fs); + serd_iter_free(f); + + SerdIter* const r = serd_model_find(model, object, rest, NULL, NULL); + const SerdNode* next = + r ? serd_statement_get_object(serd_iter_get(r)) : NULL; + + f = next ? serd_model_find(model, next, first, NULL, NULL) : NULL; + if (r && f) { + // This and next node are good, write rdf:next statement + st = serd_sink_write_statement(sink, 0, serd_iter_get(r)); + object = next; + } else { + // Terminate malformed list + st = serd_sink_write( + sink, 0, object, rest, nil, serd_statement_get_graph(fs)); + f = NULL; + } + + serd_iter_free(r); + } + + return st; +} + +static SerdStatus +write_range_statement(SerdSink* sink, + const SerdModel* model, + const unsigned depth, + SerdStatementFlags flags, + const SerdStatement* statement) +{ + const SerdNode* subject = serd_statement_get_subject(statement); + const NodeStyle subject_style = get_node_style(model, subject); + + if (depth == 0 && (subject_style == ANON_O || subject_style == LIST_O)) { + return SERD_SUCCESS; // Skip subject that will be inlined somewhere + } else if (subject_style == ANON_S) { + flags |= SERD_EMPTY_S; // Write anonymous subjects in "[] p o" style + } + + const SerdNode* object = serd_statement_get_object(statement); + const NodeStyle object_style = get_node_style(model, object); + SerdStatus st = SERD_SUCCESS; + if (object_style == ANON_O) { + flags |= SERD_ANON_O; + + SerdRange* subrange = serd_model_range(model, object, NULL, NULL, NULL); + st = st ? st : serd_sink_write_statement(sink, flags, statement); + st = st ? st : write_range_internal(sink, depth + 1, 0, subrange); + st = st ? st : serd_sink_end(sink, object); + serd_range_free(subrange); + + return st; + } else if (object_style == LIST_O) { + flags |= SERD_LIST_O; + st = st ? st : serd_sink_write_statement(sink, flags, statement); + st = st ? st : write_list(sink, model, depth + 1, object); + return st; + } + + return serd_sink_write_statement(sink, flags, statement); +} + +SerdStatus +serd_range_serialise(const SerdRange* range, + SerdSink* sink, + const SerdSerialisationFlags flags) +{ + SerdStatus st = SERD_SUCCESS; + SerdRange* const copy = serd_range_copy(range); + + if (flags & SERD_NO_INLINE_OBJECTS) { + for (; !st && !serd_range_empty(copy); serd_range_next(copy)) { + st = sink->statement(sink->handle, 0, serd_range_front(copy)); + } + } else { + st = write_range_internal(sink, 0, 0, copy); + } + + serd_range_free(copy); + return st; +} diff --git a/src/range.h b/src/range.h new file mode 100644 index 00000000..d07741ef --- /dev/null +++ b/src/range.h @@ -0,0 +1,36 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_RANGE_H +#define SERD_RANGE_H + +#include "iter.h" +#include "statement.h" + +#include "serd/serd.h" + +struct SerdRangeImpl { + SerdIter* begin; ///< Iterator to first statement in range + SerdIter* end; ///< Iterator to end of range +}; + +SerdRange* +serd_range_new(SerdIter* begin, SerdIter* end); + +bool +serd_range_scan_next(SerdRange* range); + +#endif // SERD_RANGE_H diff --git a/src/serdi.c b/src/serdi.c index 0b33ae77..46c5f47a 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -51,10 +51,12 @@ print_usage(const char* name, bool error) fprintf(os, " -b Fast bulk output for large serialisations.\n"); fprintf(os, " -c PREFIX Chop PREFIX from matching blank node IDs.\n"); fprintf(os, " -e Eat input one character at a time.\n"); + fprintf(os, " -f Fast serialisation without object inlining\n"); fprintf(os, " -h Display this help and exit.\n"); fprintf(os, " -i SYNTAX Input syntax: turtle/ntriples/trig/nquads.\n"); fprintf(os, " -k BYTES Parser stack size.\n"); fprintf(os, " -l Lax (non-strict) parsing.\n"); + fprintf(os, " -m Build and serialise a model (no streaming).\n"); fprintf(os, " -o SYNTAX Output syntax: turtle/ntriples/nquads.\n"); fprintf(os, " -p PREFIX Add PREFIX to blank node IDs.\n"); fprintf(os, " -q Suppress all output except data.\n"); @@ -93,7 +95,9 @@ main(int argc, char** argv) bool ascii = false; bool bulk_read = true; bool bulk_write = false; + bool no_inline = false; bool lax = false; + bool use_model = false; bool quiet = false; size_t stack_size = 4194304; const char* add_prefix = NULL; @@ -110,10 +114,14 @@ main(int argc, char** argv) bulk_write = true; } else if (argv[a][1] == 'e') { bulk_read = false; + } else if (argv[a][1] == 'f') { + no_inline = true; } else if (argv[a][1] == 'h') { return print_usage(argv[0], false); } else if (argv[a][1] == 'l') { lax = true; + } else if (argv[a][1] == 'm') { + use_model = true; } else if (argv[a][1] == 'q') { quiet = true; } else if (argv[a][1] == 'v') { @@ -195,6 +203,9 @@ main(int argc, char** argv) const SerdWriterFlags writer_flags = (ascii ? SERD_STYLE_ASCII : 0); + const SerdSerialisationFlags serialisation_flags = + no_inline ? SERD_NO_INLINE_OBJECTS : 0; + SerdByteSink* byte_sink = serd_byte_sink_new( (SerdWriteFunc)fwrite, out_fd, bulk_write ? 4096 : 1); @@ -205,9 +216,22 @@ main(int argc, char** argv) (SerdWriteFunc)serd_byte_sink_write, byte_sink); - SerdReader* reader = serd_reader_new( - world, input_syntax, serd_writer_get_sink(writer), stack_size); + SerdReader* reader = NULL; + SerdModel* model = NULL; + SerdInserter* inserter = NULL; + const SerdSink* sink = NULL; + if (use_model) { + const SerdModelFlags flags = + SERD_INDEX_SPO | (input_has_graphs ? SERD_INDEX_GRAPHS : 0) | + (no_inline ? 0 : SERD_INDEX_OPS); + model = serd_model_new(world, flags); + inserter = serd_inserter_new(model, env, NULL); + sink = serd_inserter_get_sink(inserter); + } else { + sink = serd_writer_get_sink(writer); + } + reader = serd_reader_new(world, input_syntax, sink, stack_size); serd_reader_set_strict(reader, !lax); if (quiet) { serd_world_set_message_sink(world, quiet_error_sink, NULL); @@ -243,6 +267,19 @@ main(int argc, char** argv) } serd_reader_finish(reader); + + if (status <= SERD_FAILURE && use_model) { + SerdSink* wsink = serd_writer_get_sink(writer); + serd_env_send_prefixes(env, wsink); + + SerdRange* range = serd_model_all(model); + status = serd_range_serialise(range, wsink, serialisation_flags); + serd_range_free(range); + } + + serd_node_free(input_name); + serd_inserter_free(inserter); + serd_model_free(model); serd_reader_free(reader); serd_writer_free(writer); serd_byte_sink_free(byte_sink); diff --git a/src/statement.c b/src/statement.c index 24440a38..347cfe78 100644 --- a/src/statement.c +++ b/src/statement.c @@ -51,3 +51,35 @@ serd_statement_get_cursor(const SerdStatement* statement) { return statement->cursor; } + +bool +serd_statement_equals(const SerdStatement* a, const SerdStatement* b) +{ + return (serd_node_equals(a->nodes[0], b->nodes[0]) && + serd_node_equals(a->nodes[1], b->nodes[1]) && + serd_node_equals(a->nodes[2], b->nodes[2]) && + serd_node_equals(a->nodes[3], b->nodes[3])); +} + +bool +serd_statement_matches(const SerdStatement* statement, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* graph) + +{ + return (serd_node_pattern_match(statement->nodes[0], subject) && + serd_node_pattern_match(statement->nodes[1], predicate) && + serd_node_pattern_match(statement->nodes[2], object) && + serd_node_pattern_match(statement->nodes[3], graph)); +} + +bool +serd_statement_matches_quad(const SerdStatement* statement, const SerdQuad quad) +{ + return serd_node_pattern_match(statement->nodes[0], quad[0]) && + serd_node_pattern_match(statement->nodes[1], quad[1]) && + serd_node_pattern_match(statement->nodes[2], quad[2]) && + serd_node_pattern_match(statement->nodes[3], quad[3]); +} diff --git a/src/statement.h b/src/statement.h index 7c8384e9..4a1056f2 100644 --- a/src/statement.h +++ b/src/statement.h @@ -19,6 +19,8 @@ #include "serd/serd.h" +#include + /** Quad of nodes (a statement), or a quad pattern. @@ -31,4 +33,14 @@ struct SerdStatementImpl { SerdCursor* cursor; }; +static inline bool +serd_node_pattern_match(const SerdNode* a, const SerdNode* b) +{ + return !a || !b || (a == b) || serd_node_equals(a, b); +} + +bool +serd_statement_matches_quad(const SerdStatement* statement, + const SerdQuad quad); + #endif // SERD_STATEMENT_H diff --git a/src/string.c b/src/string.c index d6f98388..10cd537a 100644 --- a/src/string.c +++ b/src/string.c @@ -39,6 +39,7 @@ serd_strerror(SerdStatus status) case SERD_ERR_UNKNOWN: return "Unknown error"; case SERD_ERR_BAD_SYNTAX: return "Invalid syntax"; case SERD_ERR_BAD_ARG: return "Invalid argument"; + case SERD_ERR_BAD_ITER: return "Invalid iterator"; case SERD_ERR_NOT_FOUND: return "Not found"; case SERD_ERR_ID_CLASH: return "Blank node ID clash"; case SERD_ERR_BAD_CURIE: return "Invalid CURIE"; diff --git a/src/zix/btree.c b/src/zix/btree.c index 7521086d..a55c4699 100644 --- a/src/zix/btree.c +++ b/src/zix/btree.c @@ -55,6 +55,7 @@ typedef struct { } ZixBTreeIterFrame; struct ZixBTreeIterImpl { + unsigned n_levels; ///< Maximum depth of stack unsigned level; ///< Current level in stack ZixBTreeIterFrame stack[]; ///< Position stack }; @@ -350,7 +351,9 @@ zix_btree_iter_new(const ZixBTree* const t) { const size_t s = t->height * sizeof(ZixBTreeIterFrame); - return (ZixBTreeIter*)calloc(1, sizeof(ZixBTreeIter) + s); + ZixBTreeIter* i = (ZixBTreeIter*)calloc(1, sizeof(ZixBTreeIter) + s); + i->n_levels = t->height; + return i; } ZIX_PRIVATE void @@ -712,12 +715,47 @@ zix_btree_begin(const ZixBTree* const t) return i; } +ZIX_API ZixBTreeIter* +zix_btree_end(const ZixBTree* const t) +{ + return zix_btree_iter_new(t); +} + +ZIX_API ZixBTreeIter* +zix_btree_iter_copy(const ZixBTreeIter* const i) +{ + if (!i) { + return NULL; + } + + const size_t s = i->n_levels * sizeof(ZixBTreeIterFrame); + + ZixBTreeIter* j = (ZixBTreeIter*)malloc(sizeof(ZixBTreeIter) + s); + memcpy(j, i, sizeof(ZixBTreeIter) + s); + return j; +} + ZIX_API bool zix_btree_iter_is_end(const ZixBTreeIter* const i) { return !i || i->stack[0].node == NULL; } +ZIX_API bool +zix_btree_iter_equals(const ZixBTreeIter* const lhs, const ZixBTreeIter* const rhs) +{ + if (zix_btree_iter_is_end(lhs) && zix_btree_iter_is_end(rhs)) { + return true; + } else if (!lhs || !rhs || lhs->n_levels != rhs->n_levels) { + return false; + } + + return !memcmp(lhs, + rhs, + sizeof(ZixBTreeIter) + + lhs->n_levels * sizeof(ZixBTreeIterFrame)); +} + ZIX_API void zix_btree_iter_increment(ZixBTreeIter* const i) { diff --git a/src/zix/btree.h b/src/zix/btree.h index 46daa24a..0f71d1e3 100644 --- a/src/zix/btree.h +++ b/src/zix/btree.h @@ -125,6 +125,27 @@ zix_btree_get(const ZixBTreeIter* ti); ZIX_API ZixBTreeIter* zix_btree_begin(const ZixBTree* t); +/** + Return an iterator to the end of `t` (one past the last element). + + The returned iterator must be freed with zix_btree_iter_free(). +*/ +ZIX_API ZixBTreeIter* +zix_btree_end(const ZixBTree* t); + +/** + Return a new copy of `i`. +*/ +ZIX_API ZixBTreeIter* +zix_btree_iter_copy(const ZixBTreeIter* const i); + +/** + Return true iff `lhs` is equal to `rhs`. +*/ +ZIX_API bool +zix_btree_iter_equals(const ZixBTreeIter* const lhs, + const ZixBTreeIter* const rhs); + /** Return true iff `i` is an iterator to the end of its tree. */ diff --git a/tests/model_test.c b/tests/model_test.c new file mode 100644 index 00000000..273fedbb --- /dev/null +++ b/tests/model_test.c @@ -0,0 +1,742 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#undef NDEBUG + +#include +#include +#include +#include +#include + +#include "serd/serd.h" + +#define WILDCARD_NODE NULL + +#define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#" +#define RDF_FIRST NS_RDF "first" +#define RDF_REST NS_RDF "rest" + +#define N_OBJECTS_PER 2U + +typedef const SerdNode* Quad[4]; + +typedef struct +{ + Quad query; + int expected_num_results; +} QueryTest; + +static const SerdNode* +manage(SerdWorld* world, SerdNode* node) +{ + return serd_nodes_manage(serd_world_get_nodes(world), node); +} + +static const SerdNode* +uri(SerdWorld* world, const size_t num) +{ + char str[] = "eg:000"; + snprintf(str + 3, 4, "%03zu", num); + return manage(world, serd_new_uri(str)); +} + +static int +generate(SerdWorld* world, + SerdModel* model, + size_t n_quads, + const SerdNode* graph) +{ + SerdNodes* nodes = serd_world_get_nodes(world); + + for (size_t i = 0; i < n_quads; ++i) { + size_t num = (i * N_OBJECTS_PER) + 1U; + + const SerdNode* ids[2 + N_OBJECTS_PER]; + for (size_t j = 0; j < 2 + N_OBJECTS_PER; ++j) { + ids[j] = uri(world, num++); + } + + for (size_t j = 0; j < N_OBJECTS_PER; ++j) { + assert(!serd_model_add(model, ids[0], ids[1], ids[2 + j], graph)); + } + } + + // Add some literals + + // (98 4 "hello") and (98 4 "hello"^^<5>) + const SerdNode* hello = manage(world, serd_new_string("hello")); + const SerdNode* hello_gb = + manage(world, serd_new_plain_literal("hello", "en-gb")); + const SerdNode* hello_us = + manage(world, serd_new_plain_literal("hello", "en-us")); + const SerdNode* hello_t4 = serd_nodes_manage( + nodes, serd_new_typed_literal("hello", uri(world, 4))); + const SerdNode* hello_t5 = serd_nodes_manage( + nodes, serd_new_typed_literal("hello", uri(world, 5))); + assert(!serd_model_add(model, uri(world, 98), uri(world, 4), hello, graph)); + assert(!serd_model_add( + model, uri(world, 98), uri(world, 4), hello_t5, graph)); + + // (96 4 "hello"^^<4>) and (96 4 "hello"^^<5>) + assert(!serd_model_add( + model, uri(world, 96), uri(world, 4), hello_t4, graph)); + assert(!serd_model_add( + model, uri(world, 96), uri(world, 4), hello_t5, graph)); + + // (94 5 "hello") and (94 5 "hello"@en-gb) + assert(!serd_model_add(model, uri(world, 94), uri(world, 5), hello, graph)); + assert(!serd_model_add( + model, uri(world, 94), uri(world, 5), hello_gb, graph)); + + // (92 6 "hello"@en-us) and (92 6 "hello"@en-gb) + assert(!serd_model_add( + model, uri(world, 92), uri(world, 6), hello_us, graph)); + assert(!serd_model_add( + model, uri(world, 92), uri(world, 6), hello_gb, graph)); + + // (14 6 "bonjour"@fr) and (14 6 "salut"@fr) + const SerdNode* bonjour = + manage(world, serd_new_plain_literal("bonjour", "fr")); + const SerdNode* salut = + manage(world, serd_new_plain_literal("salut", "fr")); + assert(!serd_model_add( + model, uri(world, 14), uri(world, 6), bonjour, graph)); + assert(!serd_model_add(model, uri(world, 14), uri(world, 6), salut, graph)); + + // Attempt to add duplicates + assert(serd_model_add(model, uri(world, 14), uri(world, 6), salut, graph)); + + // Add a blank node subject + const SerdNode* ablank = manage(world, serd_new_blank("ablank")); + assert(!serd_model_add(model, ablank, uri(world, 6), salut, graph)); + + // Add statement with URI object + assert(!serd_model_add(model, ablank, uri(world, 6), uri(world, 7), graph)); + + return EXIT_SUCCESS; +} + +static int +test_read(SerdWorld* world, + SerdModel* model, + const SerdNode* g, + const size_t n_quads) +{ + SerdIter* iter = serd_model_begin(model); + for (; !serd_iter_equals(iter, serd_model_end(model)); + serd_iter_next(iter)) { + const SerdStatement* statement = serd_iter_get(iter); + assert(serd_statement_get_subject(statement)); + assert(serd_statement_get_predicate(statement)); + assert(serd_statement_get_object(statement)); + } + + // Attempt to increment past end + assert(serd_iter_next(iter)); + serd_iter_free(iter); + + const char* s = "hello"; + const SerdNode* plain_hello = manage(world, serd_new_string(s)); + const SerdNode* type4_hello = + manage(world, serd_new_typed_literal(s, uri(world, 4))); + const SerdNode* type5_hello = + manage(world, serd_new_typed_literal(s, uri(world, 5))); + const SerdNode* gb_hello = + manage(world, serd_new_plain_literal(s, "en-gb")); + const SerdNode* us_hello = + manage(world, serd_new_plain_literal(s, "en-us")); + +#define NUM_PATTERNS 18 + + QueryTest patterns[NUM_PATTERNS] = { + {{NULL, NULL, NULL}, (int)(n_quads * N_OBJECTS_PER) + 12}, + {{uri(world, 1), WILDCARD_NODE, WILDCARD_NODE}, 2}, + {{uri(world, 9), uri(world, 9), uri(world, 9)}, 0}, + {{uri(world, 1), uri(world, 2), uri(world, 4)}, 1}, + {{uri(world, 3), uri(world, 4), WILDCARD_NODE}, 2}, + {{WILDCARD_NODE, uri(world, 2), uri(world, 4)}, 1}, + {{WILDCARD_NODE, WILDCARD_NODE, uri(world, 4)}, 1}, + {{uri(world, 1), WILDCARD_NODE, WILDCARD_NODE}, 2}, + {{uri(world, 1), WILDCARD_NODE, uri(world, 4)}, 1}, + {{WILDCARD_NODE, uri(world, 2), WILDCARD_NODE}, 2}, + {{uri(world, 98), uri(world, 4), plain_hello}, 1}, + {{uri(world, 98), uri(world, 4), type5_hello}, 1}, + {{uri(world, 96), uri(world, 4), type4_hello}, 1}, + {{uri(world, 96), uri(world, 4), type5_hello}, 1}, + {{uri(world, 94), uri(world, 5), plain_hello}, 1}, + {{uri(world, 94), uri(world, 5), gb_hello}, 1}, + {{uri(world, 92), uri(world, 6), gb_hello}, 1}, + {{uri(world, 92), uri(world, 6), us_hello}, 1}}; + + Quad match = {uri(world, 1), uri(world, 2), uri(world, 4), g}; + assert(serd_model_ask(model, match[0], match[1], match[2], match[3])); + + Quad nomatch = {uri(world, 1), uri(world, 2), uri(world, 9), g}; + assert(!serd_model_ask( + model, nomatch[0], nomatch[1], nomatch[2], nomatch[3])); + + assert(!serd_model_get(model, NULL, NULL, uri(world, 3), g)); + assert(!serd_model_get(model, uri(world, 1), uri(world, 99), NULL, g)); + + assert(serd_node_equals( + serd_model_get(model, uri(world, 1), uri(world, 2), NULL, g), + uri(world, 3))); + assert(serd_node_equals( + serd_model_get(model, uri(world, 1), NULL, uri(world, 3), g), + uri(world, 2))); + assert(serd_node_equals( + serd_model_get(model, NULL, uri(world, 2), uri(world, 3), g), + uri(world, 1))); + + for (unsigned i = 0; i < NUM_PATTERNS; ++i) { + QueryTest test = patterns[i]; + Quad pat = {test.query[0], test.query[1], test.query[2], g}; + + SerdRange* range = + serd_model_range(model, pat[0], pat[1], pat[2], pat[3]); + int num_results = 0; + for (; !serd_range_empty(range); serd_range_next(range)) { + ++num_results; + assert(serd_statement_matches( + serd_range_front(range), pat[0], pat[1], pat[2], pat[3])); + } + serd_range_free(range); + + assert(num_results == test.expected_num_results); + } + + // Query blank node subject + const SerdNode* ablank = manage(world, serd_new_blank("ablank")); + Quad pat = {ablank, 0, 0}; + int num_results = 0; + SerdRange* range = serd_model_range(model, pat[0], pat[1], pat[2], pat[3]); + for (; !serd_range_empty(range); serd_range_next(range)) { + ++num_results; + const SerdStatement* statement = serd_range_front(range); + assert(serd_statement_matches( + statement, pat[0], pat[1], pat[2], pat[3])); + } + serd_range_free(range); + + assert(num_results == 2); + + // Test nested queries + const SerdNode* last_subject = 0; + range = serd_model_range(model, NULL, NULL, NULL, NULL); + for (; !serd_range_empty(range); serd_range_next(range)) { + const SerdStatement* statement = serd_range_front(range); + const SerdNode* subject = serd_statement_get_subject(statement); + if (subject == last_subject) { + continue; + } + + Quad subpat = {subject, 0, 0}; + SerdRange* subrange = serd_model_range( + model, subpat[0], subpat[1], subpat[2], subpat[3]); + const SerdStatement* substatement = serd_range_front(subrange); + uint64_t num_sub_results = 0; + assert(serd_statement_get_subject(substatement) == subject); + for (; !serd_range_empty(subrange); serd_range_next(subrange)) { + assert(serd_statement_matches(serd_range_front(subrange), + subpat[0], + subpat[1], + subpat[2], + subpat[3])); + ++num_sub_results; + } + serd_range_free(subrange); + assert(num_sub_results == N_OBJECTS_PER); + + uint64_t count = serd_model_count(model, subject, 0, 0, 0); + assert(count == num_sub_results); + + last_subject = subject; + } + serd_range_free(range); + + return 0; +} + +static SerdStatus +unexpected_error(void* handle, const SerdMessage* msg) +{ + (void)handle; + + fprintf(stderr, "error: "); + vfprintf(stderr, msg->fmt, *msg->args); + return SERD_SUCCESS; +} + +static SerdStatus +expected_error(void* handle, const SerdMessage* msg) +{ + (void)handle; + + fprintf(stderr, "expected: "); + vfprintf(stderr, msg->fmt, *msg->args); + return SERD_SUCCESS; +} + +static int +test_free_null(SerdWorld* world, const size_t n_quads) +{ + (void)world; + (void)n_quads; + + serd_model_free(NULL); // Shouldn't crash + return 0; +} + +static int +test_get_world(SerdWorld* world, const size_t n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_INDEX_SPO); + assert(serd_model_get_world(model) == world); + serd_model_free(model); + return 0; +} + +static int +test_all_begin(SerdWorld* world, const size_t n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_INDEX_SPO); + SerdRange* all = serd_model_all(model); + SerdIter* begin = serd_model_find(model, NULL, NULL, NULL, NULL); + assert(serd_iter_equals(serd_range_begin(all), begin)); + + serd_range_free(all); + serd_iter_free(begin); + serd_model_free(model); + return 0; +} + +static int +test_add_null(SerdWorld* world, const size_t n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_INDEX_SPO); + + serd_world_set_message_sink(world, expected_error, NULL); + + assert(serd_model_add(model, 0, 0, 0, 0)); + assert(serd_model_add(model, uri(world, 1), 0, 0, 0)); + assert(serd_model_add(model, uri(world, 1), uri(world, 2), 0, 0)); + assert(serd_model_empty(model)); + + serd_model_free(model); + return 0; +} + +static int +test_add_with_iterator(SerdWorld* world, const size_t n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_INDEX_SPO); + + serd_world_set_message_sink(world, expected_error, NULL); + assert(!serd_model_add( + model, uri(world, 1), uri(world, 2), uri(world, 3), 0)); + + // Add a statement with an active iterator + SerdIter* iter = serd_model_begin(model); + assert(!serd_model_add( + model, uri(world, 1), uri(world, 2), uri(world, 4), 0)); + + // Check that iterator has been invalidated + assert(!serd_iter_get(iter)); + assert(serd_iter_next(iter)); + + serd_iter_free(iter); + serd_model_free(model); + return 0; +} + +static int +test_erase_with_iterator(SerdWorld* world, const size_t n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_INDEX_SPO); + + serd_world_set_message_sink(world, expected_error, NULL); + assert(!serd_model_add( + model, uri(world, 1), uri(world, 2), uri(world, 3), 0)); + assert(!serd_model_add( + model, uri(world, 4), uri(world, 5), uri(world, 6), 0)); + + // Erase a statement with an active iterator + SerdIter* iter1 = serd_model_begin(model); + SerdIter* iter2 = serd_model_begin(model); + assert(!serd_model_erase(model, iter1)); + + // Check that erased iterator points to the next statement + assert(serd_statement_matches(serd_iter_get(iter1), + uri(world, 4), + uri(world, 5), + uri(world, 6), + 0)); + + // Check that other iterator has been invalidated + assert(!serd_iter_get(iter2)); + assert(serd_iter_next(iter2)); + + serd_iter_free(iter2); + serd_iter_free(iter1); + serd_model_free(model); + return 0; +} + +static int +test_add_erase(SerdWorld* world, const size_t n_quads) +{ + (void)n_quads; + + SerdModel* model = + serd_model_new(world, SERD_INDEX_SPO | SERD_INDEX_GRAPHS); + + // Add (s p "hello") + const SerdNode* s = uri(world, 1); + const SerdNode* p = uri(world, 2); + const SerdNode* hello = manage(world, serd_new_string("hello")); + assert(!serd_model_add(model, s, p, hello, 0)); + assert(serd_model_ask(model, s, p, hello, 0)); + + // Add (s p "hi") + const SerdNode* hi = manage(world, serd_new_string("hi")); + assert(!serd_model_add(model, s, p, hi, NULL)); + assert(serd_model_ask(model, s, p, hi, 0)); + + // Erase (s p "hi") + SerdIter* iter = serd_model_find(model, s, p, hi, NULL); + assert(!serd_model_erase(model, iter)); + assert(serd_model_size(model) == 1); + serd_iter_free(iter); + + // Check that erased statement can not be found + SerdRange* empty = serd_model_range(model, s, p, hi, NULL); + assert(serd_range_empty(empty)); + serd_range_free(empty); + + serd_model_free(model); + return 0; +} + +static int +test_erase_all(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_INDEX_SPO); + generate(world, model, n_quads, NULL); + + SerdIter* iter = serd_model_begin(model); + while (!serd_iter_equals(iter, serd_model_end(model))) { + assert(!serd_model_erase(model, iter)); + } + + serd_iter_free(iter); + serd_model_free(model); + return 0; +} + +static int +test_copy(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_INDEX_SPO); + generate(world, model, n_quads, NULL); + + SerdModel* copy = serd_model_copy(model); + assert(serd_model_equals(model, copy)); + + serd_model_free(model); + serd_model_free(copy); + return 0; +} + +static int +test_equals(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_INDEX_SPO); + generate(world, model, n_quads, NULL); + serd_model_add( + model, uri(world, 0), uri(world, 1), uri(world, 2), uri(world, 3)); + + assert(serd_model_equals(NULL, NULL)); + assert(!serd_model_equals(NULL, model)); + assert(!serd_model_equals(model, NULL)); + + SerdModel* empty = serd_model_new(world, SERD_INDEX_SPO); + assert(!serd_model_equals(model, empty)); + + SerdModel* different = serd_model_new(world, SERD_INDEX_SPO); + generate(world, different, n_quads, NULL); + serd_model_add(different, + uri(world, 1), + uri(world, 1), + uri(world, 2), + uri(world, 3)); + + assert(serd_model_size(model) == serd_model_size(different)); + assert(!serd_model_equals(model, different)); + + serd_model_free(model); + serd_model_free(empty); + serd_model_free(different); + return 0; +} + +static int +test_find_past_end(SerdWorld* world, const size_t n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_INDEX_SPO); + const SerdNode* s = uri(world, 1); + const SerdNode* p = uri(world, 2); + const SerdNode* o = uri(world, 3); + assert(!serd_model_add(model, s, p, o, 0)); + assert(serd_model_ask(model, s, p, o, 0)); + + const SerdNode* huge = uri(world, 999); + SerdRange* range = serd_model_range(model, huge, huge, huge, 0); + assert(serd_range_empty(range)); + + serd_range_free(range); + serd_model_free(model); + return 0; +} + +static int +test_iter_comparison(SerdWorld* world, const size_t n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_INDEX_SPO); + + serd_world_set_message_sink(world, expected_error, NULL); + assert(!serd_model_add( + model, uri(world, 1), uri(world, 2), uri(world, 3), 0)); + + // Add a statement with an active iterator + SerdIter* iter1 = serd_model_begin(model); + SerdIter* iter2 = serd_model_begin(model); + assert(serd_iter_equals(iter1, iter2)); + + serd_iter_next(iter1); + assert(!serd_iter_equals(iter1, iter2)); + + const SerdIter* end = serd_model_end(model); + assert(serd_iter_equals(iter1, end)); + + serd_iter_free(iter2); + serd_iter_free(iter1); + serd_model_free(model); + return 0; +} + +static int +test_triple_index_read(SerdWorld* world, const size_t n_quads) +{ + for (unsigned i = 0; i < 6; ++i) { + SerdModel* model = serd_model_new(world, (1 << i)); + generate(world, model, n_quads, 0); + assert(!test_read(world, model, 0, n_quads)); + serd_model_free(model); + } + + return 0; +} + +static int +test_quad_index_read(SerdWorld* world, const size_t n_quads) +{ + for (unsigned i = 0; i < 6; ++i) { + SerdModel* model = serd_model_new(world, (1 << i) | SERD_INDEX_GRAPHS); + const SerdNode* graph = uri(world, 42); + generate(world, model, n_quads, graph); + assert(!test_read(world, model, graph, n_quads)); + serd_model_free(model); + } + + return 0; +} + +static int +test_remove_graph(SerdWorld* world, const size_t n_quads) +{ + (void)n_quads; + + SerdModel* model = + serd_model_new(world, SERD_INDEX_SPO | SERD_INDEX_GRAPHS); + + // Generate a couple of graphs + const SerdNode* graph42 = uri(world, 42); + const SerdNode* graph43 = uri(world, 43); + generate(world, model, 1, graph42); + generate(world, model, 1, graph43); + + // Remove one graph via range + SerdStatus st; + SerdRange* range = serd_model_range(model, NULL, NULL, NULL, graph43); + assert(!(st = serd_model_erase_range(model, range))); + serd_range_free(range); + + // Erase the first tuple (an element in the default graph) + SerdIter* iter = serd_model_begin(model); + assert(!serd_model_erase(model, iter)); + serd_iter_free(iter); + + // Ensure only the other graph is left + Quad pat = {0, 0, 0, graph42}; + for (iter = serd_model_begin(model); + !serd_iter_equals(iter, serd_model_end(model)); + serd_iter_next(iter)) { + assert(serd_statement_matches( + serd_iter_get(iter), pat[0], pat[1], pat[2], pat[3])); + } + serd_iter_free(iter); + + serd_model_free(model); + return 0; +} + +static int +test_default_graph(SerdWorld* world, const size_t n_quads) +{ + (void)n_quads; + + SerdModel* model = + serd_model_new(world, SERD_INDEX_SPO | SERD_INDEX_GRAPHS); + const SerdNode* s = uri(world, 1); + const SerdNode* p = uri(world, 2); + const SerdNode* o = uri(world, 3); + const SerdNode* g1 = uri(world, 101); + const SerdNode* g2 = uri(world, 102); + + // Insert the same statement into two graphs + assert(!serd_model_add(model, s, p, o, g1)); + assert(!serd_model_add(model, s, p, o, g2)); + + // Ensure we only see statement once in the default graph + assert(serd_model_count(model, s, p, o, NULL) == 1); + + serd_model_free(model); + return 0; +} + +static int +test_write_bad_list(SerdWorld* world, const size_t n_quads) +{ + (void)n_quads; + + SerdModel* model = + serd_model_new(world, SERD_INDEX_SPO | SERD_INDEX_GRAPHS); + SerdNodes* nodes = serd_nodes_new(); + const SerdNode* s = manage(world, serd_new_uri("urn:s")); + const SerdNode* p = manage(world, serd_new_uri("urn:p")); + const SerdNode* list1 = manage(world, serd_new_blank("l1")); + const SerdNode* list2 = manage(world, serd_new_blank("l2")); + const SerdNode* nofirst = manage(world, serd_new_blank("nof")); + const SerdNode* norest = manage(world, serd_new_blank("nor")); + const SerdNode* pfirst = manage(world, serd_new_uri(RDF_FIRST)); + const SerdNode* prest = manage(world, serd_new_uri(RDF_REST)); + const SerdNode* val1 = manage(world, serd_new_string("a")); + const SerdNode* val2 = manage(world, serd_new_string("b")); + + // List where second node has no rdf:first + serd_model_add(model, s, p, list1, NULL); + serd_model_add(model, list1, pfirst, val1, NULL); + serd_model_add(model, list1, prest, nofirst, NULL); + + // List where second node has no rdf:rest + serd_model_add(model, s, p, list2, NULL); + serd_model_add(model, list2, pfirst, val1, NULL); + serd_model_add(model, list2, prest, norest, NULL); + serd_model_add(model, norest, pfirst, val2, NULL); + + SerdBuffer buffer = {NULL, 0}; + SerdEnv* env = serd_env_new(NULL); + SerdWriter* writer = serd_writer_new( + world, SERD_TURTLE, 0, env, serd_buffer_sink, &buffer); + + SerdRange* all = serd_model_all(model); + serd_range_serialise(all, serd_writer_get_sink(writer), 0); + serd_range_free(all); + + serd_writer_finish(writer); + const char* str = serd_buffer_sink_finish(&buffer); + const char* expected = "\n" + " (\n" + " \"a\"\n" + " ) , (\n" + " \"a\"\n" + " \"b\"\n" + " ) .\n"; + + assert(!strcmp(str, expected)); + + free(buffer.buf); + serd_writer_free(writer); + serd_model_free(model); + serd_env_free(env); + serd_nodes_free(nodes); + return 0; +} + +int +main(void) +{ + static const size_t n_quads = 300; + + serd_model_free(NULL); // Shouldn't crash + + typedef int (*TestFunc)(SerdWorld*, size_t); + + const TestFunc tests[] = {test_free_null, + test_get_world, + test_all_begin, + test_add_null, + test_add_with_iterator, + test_erase_with_iterator, + test_add_erase, + test_erase_all, + test_copy, + test_equals, + test_find_past_end, + test_iter_comparison, + test_triple_index_read, + test_quad_index_read, + test_remove_graph, + test_default_graph, + test_write_bad_list, + NULL}; + + SerdWorld* world = serd_world_new(); + for (const TestFunc* t = tests; *t; ++t) { + serd_world_set_message_sink(world, unexpected_error, NULL); + if ((*t)(world, n_quads)) { + return EXIT_FAILURE; + } + } + + serd_world_free(world); + return 0; +} diff --git a/wscript b/wscript index cc2b0ed5..72d9c5e5 100644 --- a/wscript +++ b/wscript @@ -33,6 +33,9 @@ def options(ctx): 'largefile': 'build with large file support on 32-bit systems', 'no-posix': 'do not use POSIX functions, even if present'}) + opt.add_option('--dump', type='string', default='', dest='dump', + help='dump debugging output (iter, search, write, all)') + def configure(conf): autowaf.display_header('Serd Configuration') conf.load('compiler_c', cache=True) @@ -64,6 +67,14 @@ def configure(conf): defines = ['_POSIX_C_SOURCE=200809L'], mandatory = False) + dump = Options.options.dump.split(',') + if 'all' in dump or 'iter' in dump: + conf.define('SERD_DEBUG_ITER', 1) + if 'all' in dump or 'search' in dump: + conf.define('SERD_DEBUG_SEARCH', 1) + if 'all' in dump or 'write' in dump: + conf.define('SERD_DEBUG_WRITE', 1) + autowaf.set_lib_env(conf, 'serd', SERD_VERSION) conf.write_config_header('serd_config.h', remove=False) @@ -79,9 +90,13 @@ lib_source = ['src/base64.c', 'src/byte_source.c', 'src/cursor.c', 'src/env.c', + 'src/inserter.c', + 'src/iter.c', + 'src/model.c', 'src/n3.c', 'src/node.c', 'src/nodes.c', + 'src/range.c', 'src/reader.c', 'src/sink.c', 'src/statement.c', @@ -91,6 +106,7 @@ lib_source = ['src/base64.c', 'src/uri.c', 'src/world.c', 'src/writer.c', + 'src/zix/btree.c', 'src/zix/digest.c', 'src/zix/hash.c'] @@ -153,7 +169,8 @@ def build(bld): ('serd_test', 'tests/serd_test.c'), ('read_chunk_test', 'tests/read_chunk_test.c'), ('nodes_test', 'tests/nodes_test.c'), - ('overflow_test', 'tests/overflow_test.c')]: + ('overflow_test', 'tests/overflow_test.c'), + ('model_test', 'tests/model_test.c')]: bld(features = 'c cprogram', source = prog[1], use = 'libserd_profiled', @@ -294,9 +311,17 @@ def show_diff(from_lines, to_lines, from_filename, to_filename): tofile=os.path.abspath(to_filename)): sys.stderr.write(line) -def check_output(out_filename, check_filename, subst_from='', subst_to=''): +def check_output(out_filename, check_filename, subst_from='', subst_to='', sort=False): if not os.access(out_filename, os.F_OK): Logs.pprint('RED', 'FAIL: output %s is missing' % out_filename) + elif sort: + out_lines = sorted(set(io.open(out_filename, encoding='utf-8').readlines())) + check_lines = sorted(set(io.open(check_filename, encoding='utf-8').readlines())) + if out_lines != check_lines: + show_diff(check_lines, out_lines, check_filename, out_filename) + return False + else: + return True elif not file_equals(check_filename, out_filename, subst_from, subst_to): Logs.pprint('RED', 'FAIL: %s != %s' % (os.path.abspath(out_filename), check_filename)) @@ -414,14 +439,17 @@ def test_suite(ctx, base_uri, testdir, report, isyntax, osyntax, options=''): # Check output against test suite check_uri = model[test][mf + 'result'][0] check_path = file_uri_to_path(check_uri) + sort = '-m' in options result = autowaf.run_test( ctx, APPNAME, - lambda: check_output(action + '.out', check_path), + lambda: check_output(action + '.out', check_path, sort=sort), True, name=str(action) + ' check', quiet=True) # Run round-trip tests - test_thru(ctx, uri, action, check_path, - ' '.join(next(thru_options_iter)), isyntax, osyntax, options, quiet=True) + if not sort: + test_thru(ctx, uri, action, check_path, + ' '.join(next(thru_options_iter)), + isyntax, osyntax, options, quiet=True) # Write test report entry if report is not None: @@ -491,7 +519,8 @@ def test(ctx): ['serd_test', 'read_chunk_test', 'nodes_test', - 'overflow_test'], + 'overflow_test', + 'model_test'], name='Unit') def test_syntax_io(in_name, expected_name, lang): @@ -555,7 +584,8 @@ def test(ctx): 'serdi_static "file://%s/tests/good/manifest.ttl" > /dev/full' % srcdir, 1, name='write_error') - run_test_suites(ctx, '') + for opts in ['', '-m']: + run_test_suites(ctx, opts) autowaf.post_test(ctx, APPNAME) -- cgit v1.2.1