From c9fe9fdb61e15b64d03a8da062648ecd3f86d700 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Sat, 12 May 2018 13:28:47 +0200 Subject: WIP: Add model --- doc/serdi.1 | 7 + serd/serd.h | 484 ++++++++++++++++++++++++++++++- src/env.c | 8 + src/inserter.c | 106 +++++++ src/inserter.h | 31 ++ src/iter.c | 238 +++++++++++++++ src/iter.h | 88 ++++++ src/log.h | 60 ++++ src/model.c | 651 +++++++++++++++++++++++++++++++++++++++++ src/model.h | 53 ++++ src/node.c | 35 +++ src/node.h | 1 + src/range.c | 248 ++++++++++++++++ src/range.h | 36 +++ src/serdi.c | 39 ++- src/statement.c | 32 ++ src/statement.h | 12 + src/string.c | 1 + src/world.c | 1 + src/writer.c | 2 +- src/zix/btree.c | 40 ++- src/zix/btree.h | 21 ++ tests/model_test.c | 837 +++++++++++++++++++++++++++++++++++++++++++++++++++++ wscript | 44 ++- 24 files changed, 3063 insertions(+), 12 deletions(-) create mode 100644 src/inserter.c create mode 100644 src/inserter.h create mode 100644 src/iter.c create mode 100644 src/iter.h create mode 100644 src/log.h create mode 100644 src/model.c create mode 100644 src/model.h create mode 100644 src/range.c create mode 100644 src/range.h create mode 100644 tests/model_test.c diff --git a/doc/serdi.1 b/doc/serdi.1 index 66d845c6..4b3c9c39 100644 --- a/doc/serdi.1 +++ b/doc/serdi.1 @@ -28,6 +28,13 @@ generated immediately as input arrives, rather than waiting until an entire page of input has arrived. With this option serdi uses one page less memory, but will likely be significantly slower. +.TP +\fB\-f\fR +Fast serialisation without object inlining. This affects only model +(non-streaming) serialisation, and disables searching and statement reordering +for pretty printing. Statements will be written in simple sorted order, which +is faster, but may result in less readable output in Turtle or TriG. + .TP \fB\-h\fR Print the command line options. diff --git a/serd/serd.h b/serd/serd.h index 160db907..29bff936 100644 --- a/serd/serd.h +++ b/serd/serd.h @@ -82,6 +82,33 @@ typedef struct SerdStatementImpl SerdStatement; */ typedef struct SerdCursorImpl SerdCursor; +/** + Model. + + A model is an indexed set of statements. It may be searched using various + patterns depending on which indices are enabled. +*/ +typedef struct SerdModelImpl SerdModel; + +/** + Model Inserter. + + An inserter is used for writing statements to a model using the Serd sink + interface. This makes it simple to write to a model directly using a + SerdReader, or any other code that writes statements to a SerdStatementSink. +*/ +typedef struct SerdInserterImpl SerdInserter; + +/** + Model Iterator. +*/ +typedef struct SerdIterImpl SerdIter; + +/** + Model Range. +*/ +typedef struct SerdRangeImpl SerdRange; + /** Environment. @@ -127,6 +154,7 @@ typedef enum { SERD_ERR_UNKNOWN, /**< Unknown error */ SERD_ERR_BAD_SYNTAX, /**< Invalid syntax */ SERD_ERR_BAD_ARG, /**< Invalid argument */ + SERD_ERR_BAD_ITER, /**< Use of invalidated iterator */ SERD_ERR_NOT_FOUND, /**< Not found */ SERD_ERR_ID_CLASH, /**< Encountered clashing blank node IDs */ SERD_ERR_BAD_CURIE, /**< Invalid CURIE (e.g. prefix does not exist) */ @@ -179,6 +207,18 @@ typedef enum { */ typedef uint32_t SerdStatementFlags; +/** + Flags that control style for a model serialisation. +*/ +typedef enum { + SERD_NO_INLINE_OBJECTS = 1 << 0, /**< Do not inline objects where possible */ +} SerdSerialisationFlag; + +/** + Bitwise OR of SerdStatementFlag values. +*/ +typedef uint32_t SerdSerialisationFlags; + /** Type of a syntactic RDF node. @@ -253,6 +293,35 @@ typedef enum { SERD_GRAPH = 3 /**< Graph ("context") */ } SerdField; +/** + Flags for model features. +*/ +typedef enum { + SERD_MODEL_GRAPHS = 1 /**< Support multiple graphs in model. */ +} SerdModelFlag; + +/** + Bitwise OR of SerdModelFlag values. +*/ +typedef uint32_t SerdModelFlags; + +/** + Indexing option. +*/ +typedef enum { + SERD_SPO = 1, /**< Subject, Predicate, Object */ + SERD_SOP = 1 << 1, /**< Subject, Object, Predicate */ + SERD_OPS = 1 << 2, /**< Object, Predicate, Subject */ + SERD_OSP = 1 << 3, /**< Object, Subject, Predicate */ + SERD_PSO = 1 << 4, /**< Predicate, Subject, Object */ + SERD_POS = 1 << 5 /**< Predicate, Object, Subject */ +} SerdIndexOption; + +/** + Bitwise OR of SerdIndexOption values. +*/ +typedef uint32_t SerdIndexOptions; + /** Bitwise OR of SerdNodeFlag values. */ @@ -633,6 +702,16 @@ SERD_API bool serd_node_equals(const SerdNode* a, const SerdNode* b); +/** + Compare two nodes. + + Returns less than, equal to, or greater than zero if `a` is less than, equal + to, or greater than `b`, respectively. +*/ +SERD_API +int +serd_node_compare(const SerdNode* a, const SerdNode* b); + /** Create a new URI from a string. */ @@ -983,6 +1062,13 @@ serd_env_foreach(const SerdEnv* env, SerdPrefixSink func, void* handle); +/** + Send all prefixes in `env` to `sink`. +*/ +SERD_API +void +serd_env_send_prefixes(const SerdEnv* env, SerdSink* sink); + /** @} @name Sink @@ -1233,7 +1319,7 @@ serd_writer_free(SerdWriter* writer); Return a sink interface that emits statements via `writer`. */ SERD_API -const SerdSink* +SerdSink* serd_writer_get_sink(SerdWriter* writer); /** @@ -1362,6 +1448,251 @@ SERD_API void serd_nodes_deref(SerdNodes* nodes, const SerdNode* node); +/** + @} + @name Model + @{ +*/ + +/** + Create a new model. + + @param world The world in which to make this model. + + @param indices Enabled indices, for example `SERD_SPO | SERD_OPS`. Be sure + to enable an index where the most significant node(s) are not variables in + your queries. For example, to make (? P O) queries, enable either SERD_OPS + or SERD_POS). + + @param flags Model options. +*/ +SERD_API +SerdModel* +serd_model_new(SerdWorld* world, SerdIndexOptions indices, SerdModelFlags flags); + +/** + Return a deep copy of `model`. +*/ +SERD_API +SerdModel* +serd_model_copy(const SerdModel* model); + +/** + Return true iff `a` is equal to `b`, ignoring statement cursor metadata. +*/ +SERD_API +bool +serd_model_equals(const SerdModel* a, const SerdModel* b); + +/** + Close and free `model`. +*/ +SERD_API +void +serd_model_free(SerdModel* model); + +/** + Get the world associated with `model`. +*/ +SERD_API +SerdWorld* +serd_model_get_world(SerdModel* model); + +/** + Return the number of statements stored in `model`. +*/ +SERD_API +size_t +serd_model_size(const SerdModel* model); + +/** + Return true iff there are no statements stored in `model`. +*/ +SERD_API +bool +serd_model_empty(const SerdModel* model); + +/** + Return an iterator to the start of `model`. +*/ +SERD_API +SerdIter* +serd_model_begin(const SerdModel* model); + +/** + Return an iterator to the end of `model`. +*/ +SERD_API +const SerdIter* +serd_model_end(const SerdModel* model); + +/** + Return a range of all statements in `model`. +*/ +SERD_API +SerdRange* +serd_model_all(const SerdModel* model); + +/** + Search for statements by a quad pattern. + @return An iterator to the first match, or NULL if no matches found. +*/ +SERD_API +SerdIter* +serd_model_find(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +/** + Search for statements by a quad pattern. + @return A range containins all matching statements. +*/ +SERD_API +SerdRange* +serd_model_range(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +/** + Search for a single node that matches a pattern. + Exactly one of `s`, `p`, `o` must be NULL. + This function is mainly useful for predicates that only have one value. + @return The first matching node, or NULL if no matches are found. +*/ +SERD_API +const SerdNode* +serd_model_get(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +/** + Search for a single statement that matches a pattern. + + This function is mainly useful for predicates that only have one value. + + @return The first matching statement, or NULL if none are found. +*/ +SERD_API +const SerdStatement* +serd_model_get_statement(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +/** + Return true iff a statement exists. +*/ +SERD_API +bool +serd_model_ask(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +/** + Return the number of matching statements. +*/ +SERD_API +size_t +serd_model_count(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +/** + Add a statement to a model from nodes. + + This function fails if there are any active iterators on `model`. +*/ +SERD_API +SerdStatus +serd_model_add(SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +/** + Add a statement to a model. + + This function fails if there are any active iterators on `model`. +*/ +SERD_API +SerdStatus +serd_model_insert(SerdModel* model, const SerdStatement* statement); + +/** + Add a range of statements to a model. + + This function fails if there are any active iterators on `model`. +*/ +SERD_API +SerdStatus +serd_model_add_range(SerdModel* model, SerdRange* range); + +/** + Remove a quad from a model via an iterator. + + Calling this function invalidates all iterators on `model` except `iter`. + + @param model The model which `iter` points to. + @param iter Iterator to the element to erase, which is incremented to the + next value on return. +*/ +SERD_API +SerdStatus +serd_model_erase(SerdModel* model, SerdIter* iter); + +/** + Remove a range from a model. + + Calling this function invalidates all iterators on `model` except `iter`. + + @param model The model which `range` points to. + @param range Range to erase, which will be empty on return; +*/ +SERD_API +SerdStatus +serd_model_erase_range(SerdModel* model, SerdRange* range); + +/** + @} + @name Inserter + @{ +*/ + +/** + Create an inserter for writing statements to a model. +*/ +SERD_API +SerdInserter* +serd_inserter_new(SerdModel* model, + SerdEnv* env, + const SerdNode* default_graph); + +/** + Free an inserter. +*/ +SERD_API +void +serd_inserter_free(SerdInserter* inserter); + +/** + Return a sink interface that adds statements via `inserter`. +*/ +SERD_API +const SerdSink* +serd_inserter_get_sink(SerdInserter* inserter); + /** @} @name Statement @@ -1410,6 +1741,157 @@ SERD_API const SerdCursor* serd_statement_get_cursor(const SerdStatement* statement); +/** + Return true iff `a` is equal to `b`, ignoring statement cursor metadata. + + Only returns true if nodes are equivalent, does not perform wildcard matching. +*/ +SERD_API +bool +serd_statement_equals(const SerdStatement* a, const SerdStatement* b); + +/** + Return true iff `statement` matches the given pattern. + + The matching rules are the same used for querying: nodes match if they are + equivalent, and NULL acts as a wildcard that matches any node. +*/ +SERD_API +bool +serd_statement_matches(const SerdStatement* statement, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* graph); + +/** + @} + @name Iterator + @{ +*/ + +/** + Return a new copy of `iter`. +*/ +SERD_API +SerdIter* +serd_iter_copy(const SerdIter* iter); + +/** + Return the statement pointed to by `iter`. +*/ +SERD_API +const SerdStatement* +serd_iter_get(const SerdIter* iter); + +/** + Increment `iter` to point to the next statement. + + @return True iff `iter` has reached the end. +*/ +SERD_API +bool +serd_iter_next(SerdIter* iter); + +/** + Return true iff `lhs` is equal to `rhs`. +*/ +SERD_API +bool +serd_iter_equals(const SerdIter* lhs, const SerdIter* rhs); + +/** + Free `iter`. +*/ +SERD_API +void +serd_iter_free(SerdIter* iter); + +/** + @} + @name Range + @{ +*/ + +/** + Return a new copy of `range`. +*/ +SERD_API +SerdRange* +serd_range_copy(const SerdRange* range); + +/** + Free `range`. +*/ +SERD_API +void +serd_range_free(SerdRange* range); + +/** + Return the first statement in `range`, or NULL if `range` is empty. +*/ +SERD_API +const SerdStatement* +serd_range_front(const SerdRange* range); + +/** + Return true iff `lhs` is equal to `rhs`. +*/ +SERD_API +bool +serd_range_equals(const SerdRange* lhs, const SerdRange* rhs); + +/** + Increment the start of `range` to point to the next statement. +*/ +SERD_API +bool +serd_range_next(SerdRange* range); + +/** + Return the number of statements in `range`. +*/ +SERD_API +size_t +serd_range_size(const SerdRange* range); + +/** + Return true iff there are no statements in `range`. +*/ +SERD_API +bool +serd_range_empty(const SerdRange* range); + +/** + Return an iterator to the start of `range`. +*/ +SERD_API +const SerdIter* +serd_range_begin(const SerdRange* range); + +/** + Return an iterator to the end of `range`. +*/ +SERD_API +const SerdIter* +serd_range_end(const SerdRange* range); + +/** + Write `range` to `sink`. + + The serialisation style can be controlled with `flags`. The default is to + write statements in an order suited for pretty-printing with Turtle or TriG + with as many objects written inline as possible. If + `SERD_NO_INLINE_OBJECTS` is given, a simple sorted stream is written + instead, which is significantly faster since no searching is required, but + can result in ugly output for Turtle or Trig. +*/ +SERD_API +SerdStatus +serd_range_serialise(const SerdRange* range, + const SerdSink* sink, + SerdSerialisationFlags flags); + /** @} @name Cursor diff --git a/src/env.c b/src/env.c index 00585426..e749f050 100644 --- a/src/env.c +++ b/src/env.c @@ -326,3 +326,11 @@ serd_env_foreach(const SerdEnv* env, func(handle, env->prefixes[i].name, env->prefixes[i].uri); } } + +void +serd_env_send_prefixes(const SerdEnv* env, SerdSink* sink) +{ + for (size_t i = 0; i < env->n_prefixes; ++i) { + serd_sink_set_prefix(sink, env->prefixes[i].name, env->prefixes[i].uri); + } +} diff --git a/src/inserter.c b/src/inserter.c new file mode 100644 index 00000000..393e2c20 --- /dev/null +++ b/src/inserter.c @@ -0,0 +1,106 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "inserter.h" + +#include "model.h" +#include "statement.h" +#include "world.h" + +#include + +static SerdStatus +serd_inserter_on_base(SerdInserter* inserter, const SerdNode* uri) +{ + return serd_env_set_base_uri(inserter->env, uri); +} + +static SerdStatus +serd_inserter_on_prefix(SerdInserter* inserter, + const SerdNode* name, + const SerdNode* uri) +{ + return serd_env_set_prefix(inserter->env, name, uri); +} + +static const SerdNode* +manage_or_intern(SerdNodes* nodes, SerdNode* manage, const SerdNode* intern) +{ + return manage ? serd_nodes_manage(nodes, manage) + : serd_nodes_intern(nodes, intern); +} + +static SerdStatus +serd_inserter_on_statement(SerdInserter* inserter, + const SerdStatementFlags flags, + const SerdStatement* statement) +{ + const SerdNode* const subject = serd_statement_get_subject(statement); + const SerdNode* const predicate = serd_statement_get_predicate(statement); + const SerdNode* const object = serd_statement_get_object(statement); + const SerdNode* const graph = serd_statement_get_graph(statement); + + // Attempt to expand all nodes to eliminate CURIEs + SerdNode* const s = serd_env_expand(inserter->env, subject); + SerdNode* const p = serd_env_expand(inserter->env, predicate); + SerdNode* const o = serd_env_expand(inserter->env, object); + SerdNode* const g = serd_env_expand(inserter->env, graph); + + SerdNodes* const nodes = inserter->model->world->nodes; + const SerdNode* model_graph = manage_or_intern(nodes, g, graph); + if (!model_graph) { + model_graph = serd_nodes_intern(nodes, inserter->default_graph); + } + + const SerdStatus st = + serd_model_add_internal(inserter->model, + statement->cursor, + manage_or_intern(nodes, s, subject), + manage_or_intern(nodes, p, predicate), + manage_or_intern(nodes, o, object), + model_graph); + + return st > SERD_FAILURE ? st : SERD_SUCCESS; +} + +SerdInserter* +serd_inserter_new(SerdModel* model, SerdEnv* env, const SerdNode* default_graph) +{ + SerdInserter* inserter = (SerdInserter*)calloc(1, sizeof(SerdInserter)); + inserter->model = model; + inserter->env = env; + inserter->default_graph = serd_node_copy(default_graph); + inserter->iface.handle = inserter; + inserter->iface.base = (SerdBaseSink)serd_inserter_on_base; + inserter->iface.prefix = (SerdPrefixSink)serd_inserter_on_prefix; + inserter->iface.statement = (SerdStatementSink)serd_inserter_on_statement; + return inserter; +} + +void +serd_inserter_free(SerdInserter* inserter) +{ + if (inserter) { + serd_node_free(inserter->default_graph); + free(inserter); + } +} + +const SerdSink* +serd_inserter_get_sink(SerdInserter* inserter) +{ + return &inserter->iface; +} diff --git a/src/inserter.h b/src/inserter.h new file mode 100644 index 00000000..2f3e4239 --- /dev/null +++ b/src/inserter.h @@ -0,0 +1,31 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_INSERTER_H +#define SERD_INSERTER_H + +#include "sink.h" + +#include "serd/serd.h" + +struct SerdInserterImpl { + SerdModel* model; + SerdEnv* env; + SerdNode* default_graph; + SerdSink iface; +}; + +#endif // SERD_INSERTER_H diff --git a/src/iter.c b/src/iter.c new file mode 100644 index 00000000..71df42c1 --- /dev/null +++ b/src/iter.c @@ -0,0 +1,238 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "iter.h" + +#include "log.h" +#include "model.h" +#include "statement.h" +#include "world.h" + +#include "serd/serd.h" +#include "zix/btree.h" + +#include +#include +#include + +static bool +serd_iter_pattern_matches(const SerdIter* iter, const SerdQuad pat) +{ + const SerdStatement* key = (const SerdStatement*)zix_btree_get(iter->cur); + for (int i = 0; i < iter->n_prefix; ++i) { + const int idx = orderings[iter->order][i]; + if (!serd_node_pattern_match(key->nodes[idx], pat[idx])) { + return false; + } + } + + return true; +} + +/** + Seek forward as necessary until `iter` points at a matching statement. + + @return true iff iterator reached end of valid range. +*/ +static bool +serd_iter_seek_match(SerdIter* iter, const SerdQuad pat) +{ + for (; !zix_btree_iter_is_end(iter->cur); + zix_btree_iter_increment(iter->cur)) { + const SerdStatement* s = (const SerdStatement*)zix_btree_get(iter->cur); + if (serd_statement_matches_quad(s, pat)) { + return false; // Found match + } else if (iter->mode == FILTER_RANGE && + !serd_iter_pattern_matches(iter, pat)) { + zix_btree_iter_free(iter->cur); + iter->cur = NULL; + return true; // Reached end of range + } + } + + assert(zix_btree_iter_is_end(iter->cur)); + return true; // Reached end of index +} + +static bool +check_version(const SerdIter* const iter) +{ + if (iter->version != iter->model->version) { + serd_world_errorf(iter->model->world, + SERD_ERR_BAD_ITER, + "attempt to use invalidated iterator\n"); + return false; + } + return true; +} + +SerdIter* +serd_iter_new(const SerdModel* model, + ZixBTreeIter* cur, + const SerdQuad pat, + SerdOrder order, + SearchMode mode, + int n_prefix) +{ + SerdIter* iter = (SerdIter*)calloc(1, sizeof(SerdIter)); + iter->model = model; + iter->cur = cur; + iter->order = order; + iter->mode = mode; + iter->n_prefix = n_prefix; + iter->version = model->version; + + switch (iter->mode) { + case ALL: + case RANGE: + assert(zix_btree_iter_is_end(iter->cur) || + serd_statement_matches_quad( + ((const SerdStatement*)zix_btree_get(iter->cur)), pat)); + break; + case FILTER_RANGE: + case FILTER_ALL: + serd_iter_seek_match(iter, pat); + break; + } + + // Replace (possibly temporary) nodes in pattern with nodes from the model + if (!zix_btree_iter_is_end(iter->cur)) { + const SerdStatement* s = (const SerdStatement*)zix_btree_get(iter->cur); + for (int i = 0; i < TUP_LEN; ++i) { + if (pat[i]) { + iter->pat[i] = s->nodes[i]; + } + } + } + +#ifdef SERD_DEBUG_ITER + if (!zix_btree_iter_is_end(iter->cur)) { + const SerdStatement* statement = serd_iter_get(iter); + SERD_ITER_LOG("New %p pat=" TUP_FMT " realpat=" TUP_FMT " order=%d prefix=%d mode=%d cur=" TUP_FMT "\n", + (void*)iter, + TUP_FMT_ARGS(pat), + TUP_FMT_ARGS(iter->pat), + iter->order, + iter->n_prefix, + iter->mode, + TUP_FMT_ARGS(statement->nodes)); + } else { + SERD_ITER_LOG("New %p pat=" TUP_FMT " mode=%d at end\n", + (void*)iter, + TUP_FMT_ARGS(pat), + iter->mode); + } +#endif + + return iter; +} + +SerdIter* +serd_iter_copy(const SerdIter* iter) +{ + if (!iter) { + return NULL; + } + + SerdIter* copy = (SerdIter*)malloc(sizeof(SerdIter)); + memcpy(copy, iter, sizeof(SerdIter)); + copy->cur = zix_btree_iter_copy(iter->cur); + return copy; +} + +const SerdStatement* +serd_iter_get(const SerdIter* iter) +{ + return check_version(iter) ? (const SerdStatement*)zix_btree_get(iter->cur) + : NULL; +} + +bool +serd_iter_scan_next(SerdIter* iter) +{ + if (zix_btree_iter_is_end(iter->cur)) { + return true; + } + + switch (iter->mode) { + case ALL: + // At the end if the cursor is (assigned above) + break; + case RANGE: + SERD_ITER_LOG("%p range next\n", (void*)iter); + // At the end if the MSNs no longer match + if (!serd_iter_pattern_matches(iter, iter->pat)) { + zix_btree_iter_free(iter->cur); + iter->cur = NULL; + SERD_ITER_LOG("%p reached non-match end\n", (void*)iter); + } + break; + case FILTER_RANGE: + case FILTER_ALL: + // Seek forward to next match + serd_iter_seek_match(iter, iter->pat); + break; + } + + if (zix_btree_iter_is_end(iter->cur)) { + SERD_ITER_LOG("%p reached end\n", (void*)iter); + return true; + } else { +#ifdef SERD_DEBUG_ITER + SERD_ITER_LOG("%p reached " TUP_FMT "\n", + (void*)iter, + TUP_FMT_ARGS(serd_iter_get(iter)->nodes)); +#endif + return false; + } +} + +bool +serd_iter_next(SerdIter* iter) +{ + if (zix_btree_iter_is_end(iter->cur) || !check_version(iter)) { + return true; + } + + zix_btree_iter_increment(iter->cur); + return serd_iter_scan_next(iter); +} + +bool +serd_iter_equals(const SerdIter* lhs, const SerdIter* rhs) +{ + if (!lhs && !rhs) { + return true; + } + + return (lhs && rhs && lhs->model == rhs->model && + zix_btree_iter_equals(lhs->cur, rhs->cur) && + serd_node_pattern_match(lhs->pat[0], rhs->pat[0]) && + serd_node_pattern_match(lhs->pat[1], rhs->pat[1]) && + serd_node_pattern_match(lhs->pat[2], rhs->pat[2]) && + serd_node_pattern_match(lhs->pat[3], rhs->pat[3]) && + lhs->order == rhs->order && lhs->mode == rhs->mode && + lhs->n_prefix == rhs->n_prefix); +} + +void +serd_iter_free(SerdIter* iter) +{ + if (iter) { + zix_btree_iter_free(iter->cur); + free(iter); + } +} diff --git a/src/iter.h b/src/iter.h new file mode 100644 index 00000000..ac3e4b16 --- /dev/null +++ b/src/iter.h @@ -0,0 +1,88 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_ITER_H +#define SERD_ITER_H + +#include "statement.h" + +#include "serd/serd.h" +#include "zix/btree.h" + +#include + +/** Triple ordering */ +typedef enum { + SPO, ///< Subject, Predicate, Object + SOP, ///< Subject, Object, Predicate + OPS, ///< Object, Predicate, Subject + OSP, ///< Object, Subject, Predicate + PSO, ///< Predicate, Subject, Object + POS, ///< Predicate, Object, Subject + GSPO, ///< Graph, Subject, Predicate, Object + GSOP, ///< Graph, Subject, Object, Predicate + GOPS, ///< Graph, Object, Predicate, Subject + GOSP, ///< Graph, Object, Subject, Predicate + GPSO, ///< Graph, Predicate, Subject, Object + GPOS ///< Graph, Predicate, Object, Subject +} SerdOrder; + +/** Mode for searching or iteration */ +typedef enum { + ALL, ///< Iterate over entire store + RANGE, ///< Iterate over range with equal prefix + FILTER_RANGE, ///< Iterate over range with equal prefix, filtering + FILTER_ALL ///< Iterate to end of store, filtering +} SearchMode; + +struct SerdIterImpl { + const SerdModel* model; ///< Model being iterated over + ZixBTreeIter* cur; ///< Current DB cursor + SerdQuad pat; ///< Pattern (in ordering order) + SerdOrder order; ///< Store order (which index) + SearchMode mode; ///< Iteration mode + int n_prefix; ///< Prefix for RANGE and FILTER_RANGE + uint64_t version; ///< Model version when iterator was created +}; + +#define NUM_ORDERS 12 +#define TUP_LEN 4 + +/** + Quads of indices for each order, from most to least significant + (array indexed by SordOrder) +*/ +static const int orderings[NUM_ORDERS][TUP_LEN] = { + { 0, 1, 2, 3 }, { 0, 2, 1, 3 }, // SPO, SOP + { 2, 1, 0, 3 }, { 2, 0, 1, 3 }, // OPS, OSP + { 1, 0, 2, 3 }, { 1, 2, 0, 3 }, // PSO, POS + { 3, 0, 1, 2 }, { 3, 0, 2, 1 }, // GSPO, GSOP + { 3, 2, 1, 0 }, { 3, 2, 0, 1 }, // GOPS, GOSP + { 3, 1, 0, 2 }, { 3, 1, 2, 0 } // GPSO, GPOS +}; + +SerdIter* serd_iter_new(const SerdModel* model, + ZixBTreeIter* cur, + const SerdQuad pat, + SerdOrder order, + SearchMode mode, + int n_prefix); + +bool serd_iter_scan_next(SerdIter* iter); + +bool serd_quad_match(const SerdQuad x, const SerdQuad y); + +#endif // SERD_ITER_H diff --git a/src/log.h b/src/log.h new file mode 100644 index 00000000..c60cb326 --- /dev/null +++ b/src/log.h @@ -0,0 +1,60 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_LOG_H +#define SERD_LOG_H + +#ifdef SERD_DEBUG_SEARCH +/** String name of each ordering (array indexed by SordOrder) */ +static const char* const order_names[] = { + "spo", "sop", "ops", "osp", "pso", "pos", + "gspo", "gsop", "gops", "gosp", "gpso", "gpos" +}; +#endif + +#define TUP_FMT "(%s %s %s %s)" +#define TUP_FMT_ELEM(e) ((e) ? serd_node_get_string(e) : "*") +#define TUP_FMT_ARGS(t) \ + TUP_FMT_ELEM((t)[0]), \ + TUP_FMT_ELEM((t)[1]), \ + TUP_FMT_ELEM((t)[2]), \ + TUP_FMT_ELEM((t)[3]) + +#define STATEMENT_FMT_ARGS(s) \ + TUP_FMT_ELEM(serd_statement_get_subject(s)), \ + TUP_FMT_ELEM(serd_statement_get_predicate(s)), \ + TUP_FMT_ELEM(serd_statement_get_object(s)), \ + TUP_FMT_ELEM(serd_statement_get_graph(s)) + +#define SERD_LOG(prefix, ...) fprintf(stderr, "serd." prefix ": " __VA_ARGS__) + +#ifdef SERD_DEBUG_ITER +# define SERD_ITER_LOG(...) SERD_LOG("iter", __VA_ARGS__) +#else +# define SERD_ITER_LOG(...) +#endif +#ifdef SERD_DEBUG_SEARCH +# define SERD_FIND_LOG(...) SERD_LOG("search", __VA_ARGS__) +#else +# define SERD_FIND_LOG(...) +#endif +#ifdef SERD_DEBUG_WRITE +# define SERD_WRITE_LOG(...) SERD_LOG("write", __VA_ARGS__) +#else +# define SERD_WRITE_LOG(...) +#endif + +#endif // SERD_LOG_H diff --git a/src/model.c b/src/model.c new file mode 100644 index 00000000..3bc55c93 --- /dev/null +++ b/src/model.c @@ -0,0 +1,651 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "model.h" + +#include "cursor.h" +#include "iter.h" +#include "log.h" +#include "node.h" +#include "range.h" +#include "statement.h" +#include "world.h" + +#include "zix/btree.h" + +#include +#include +#include +#include + +#define DEFAULT_ORDER SPO +#define DEFAULT_GRAPH_ORDER GSPO + +/** + Compare quads lexicographically, ignoring graph. + + NULL IDs (equal to 0) are treated as wildcards, always less than every + other possible ID, except itself. +*/ +static int +serd_triple_compare(const void* x, const void* y, void* user_data) +{ + const int* const ordering = (const int*)user_data; + const SerdStatement* const s = (const SerdStatement*)x; + const SerdStatement* const t = (const SerdStatement*)y; + + for (int i = 0; i < TUP_LEN; ++i) { + const int o = ordering[i]; + if (o != SERD_GRAPH) { + const int cmp = + serd_node_wildcard_compare(s->nodes[o], t->nodes[o]); + if (cmp) { + return cmp; + } + } + } + + return 0; +} + +/** + Compare quads lexicographically, with exact (non-wildcard) graph matching. +*/ +static int +serd_quad_compare(const void* x, const void* y, void* user_data) +{ + const SerdStatement* const s = (const SerdStatement*)x; + const SerdStatement* const t = (const SerdStatement*)y; + + // Compare graph without wildcard matching + const int cmp = serd_node_compare( + s->nodes[SERD_GRAPH], t->nodes[SERD_GRAPH]); + + return cmp ? cmp : serd_triple_compare(x, y, user_data); +} + +/** + Return true iff `serd` has an index for `order`. + If `graphs` is true, `order` will be modified to be the + corresponding order with a G prepended (so G will be the MSN). +*/ +static inline bool +serd_model_has_index(const SerdModel* model, + SerdOrder* order, + int* n_prefix, + bool graphs) +{ + if (graphs) { + *order = (SerdOrder)(*order + GSPO); + *n_prefix += 1; + } + + return model->indices[*order]; +} + +/** + Return the best available index for a pattern. + @param pat Pattern in standard (S P O G) order + @param mode Set to the (best) iteration mode for iterating over results + @param n_prefix Set to the length of the range prefix + (for `mode` == RANGE and `mode` == FILTER_RANGE) +*/ +static SerdOrder +serd_model_best_index(const SerdModel* model, + const SerdQuad pat, + SearchMode* mode, + int* n_prefix) +{ + const bool graph_search = (pat[SERD_GRAPH] != 0); + + const unsigned sig = ((pat[0] ? 1 : 0) * 0x100 + + (pat[1] ? 1 : 0) * 0x010 + + (pat[2] ? 1 : 0) * 0x001); + + SerdOrder good[2] = { (SerdOrder)-1, (SerdOrder)-1 }; + +#define PAT_CASE(sig, m, g0, g1, np) \ + case sig: \ + *mode = m; \ + good[0] = g0; \ + good[1] = g1; \ + *n_prefix = np; \ + break + + // Good orderings that don't require filtering + *mode = RANGE; + *n_prefix = 0; + switch (sig) { + case 0x000: + assert(graph_search); + *mode = RANGE; + *n_prefix = 1; + return DEFAULT_GRAPH_ORDER; + case 0x111: + *mode = RANGE; + *n_prefix = graph_search ? 4 : 3; + return graph_search ? DEFAULT_GRAPH_ORDER : DEFAULT_ORDER; + + PAT_CASE(0x001, RANGE, OPS, OSP, 1); + PAT_CASE(0x010, RANGE, POS, PSO, 1); + PAT_CASE(0x011, RANGE, OPS, POS, 2); + PAT_CASE(0x100, RANGE, SPO, SOP, 1); + PAT_CASE(0x101, RANGE, SOP, OSP, 2); + PAT_CASE(0x110, RANGE, SPO, PSO, 2); + } + + if (*mode == RANGE) { + if (serd_model_has_index(model, &good[0], n_prefix, graph_search)) { + return good[0]; + } else if (serd_model_has_index( + model, &good[1], n_prefix, graph_search)) { + return good[1]; + } + } + + // Not so good orderings that require filtering, but can + // still be constrained to a range + switch (sig) { + PAT_CASE(0x011, FILTER_RANGE, OSP, PSO, 1); + PAT_CASE(0x101, FILTER_RANGE, SPO, OPS, 1); + // SPO is always present, so 0x110 is never reached here + default: break; + } + + if (*mode == FILTER_RANGE) { + if (serd_model_has_index(model, &good[0], n_prefix, graph_search)) { + return good[0]; + } else if (serd_model_has_index( + model, &good[1], n_prefix, graph_search)) { + return good[1]; + } + } + + if (graph_search) { + *mode = FILTER_RANGE; + *n_prefix = 1; + return DEFAULT_GRAPH_ORDER; + } else { + *mode = FILTER_ALL; + return DEFAULT_ORDER; + } +} + +SerdModel* +serd_model_new(SerdWorld* world, SerdIndexOptions indices, SerdModelFlags flags) +{ + SerdModel* model = (SerdModel*)calloc(1, sizeof(struct SerdModelImpl)); + + indices |= SERD_SPO; // SPO index is mandatory + + model->world = world; + model->index_options = indices; + model->flags = flags; + + for (unsigned i = 0; i < (NUM_ORDERS / 2); ++i) { + const int* const ordering = orderings[i]; + const int* const g_ordering = orderings[i + (NUM_ORDERS / 2)]; + + if (indices & (1 << i)) { + model->indices[i] = + zix_btree_new((ZixComparator)serd_triple_compare, + (const void*)ordering, + NULL); + if (flags & SERD_MODEL_GRAPHS) { + model->indices[i + (NUM_ORDERS / 2)] = + zix_btree_new((ZixComparator)serd_quad_compare, + (const void*)g_ordering, + NULL); + } + } + } + + // Create end iterator + const SerdOrder order = model->indices[GSPO] ? GSPO : SPO; + ZixBTreeIter* cur = zix_btree_end(model->indices[order]); + const SerdQuad pat = { 0, 0, 0, 0 }; + + model->end = serd_iter_new(model, cur, pat, order, ALL, 0); + + return model; +} + +SerdModel* +serd_model_copy(const SerdModel* model) +{ + SerdModel* copy = + serd_model_new(model->world, model->index_options, model->flags); + + SerdRange* all = serd_model_all(model); + serd_model_add_range(copy, all); + serd_range_free(all); + + assert(serd_model_size(model) == serd_model_size(copy)); + return copy; +} + +SERD_API +bool +serd_model_equals(const SerdModel* a, const SerdModel* b) +{ + if (!a && !b) { + return true; + } else if (!a || !b || serd_model_size(a) != serd_model_size(b)) { + return false; + } + + SerdRange* ra = serd_model_all(a); + SerdRange* rb = serd_model_all(b); + + bool result = true; + while (!serd_range_empty(ra) && !serd_range_empty(rb)) { + if (!serd_statement_equals(serd_range_front(ra), + serd_range_front(rb))) { + result = false; + break; + } + + serd_range_next(ra); + serd_range_next(rb); + } + + result = result && serd_range_empty(ra) && serd_range_empty(rb); + + serd_range_free(ra); + serd_range_free(rb); + return result; +} + +static void +serd_model_drop_statement(SerdModel* model, SerdStatement* statement) + +{ + for (int i = 0; i < TUP_LEN; ++i) { + if (statement->nodes[i]) { + serd_nodes_deref(model->world->nodes, statement->nodes[i]); + } + } + + if (statement->cursor && statement->cursor->file) { + serd_nodes_deref(model->world->nodes, statement->cursor->file); + } + + free(statement->cursor); + free(statement); +} + +void +serd_model_free(SerdModel* model) +{ + if (!model) { + return; + } + + serd_iter_free(model->end); + + ZixBTree* index = model->indices[model->indices[DEFAULT_GRAPH_ORDER] + ? DEFAULT_GRAPH_ORDER + : DEFAULT_ORDER]; + + // Free statements + ZixBTreeIter* t = zix_btree_begin(index); + for (; !zix_btree_iter_is_end(t); zix_btree_iter_increment(t)) { + SerdStatement* s = (SerdStatement*)zix_btree_get(t); + free(s->cursor); + free(s); + } + zix_btree_iter_free(t); + + // Free indices + for (unsigned o = 0; o < NUM_ORDERS; ++o) { + if (model->indices[o]) { + zix_btree_free(model->indices[o]); + } + } + + free(model); +} + +SerdWorld* +serd_model_get_world(SerdModel* model) +{ + return model->world; +} + +size_t +serd_model_size(const SerdModel* model) +{ + const SerdOrder order = model->indices[GSPO] ? GSPO : SPO; + return zix_btree_size(model->indices[order]); +} + +bool +serd_model_empty(const SerdModel* model) +{ + return serd_model_size(model) == 0; +} + +SerdIter* +serd_model_begin(const SerdModel* model) +{ + if (serd_model_size(model) == 0) { + return serd_iter_copy(serd_model_end(model)); + } else { + const SerdOrder order = model->indices[GSPO] ? GSPO : SPO; + ZixBTreeIter* cur = zix_btree_begin(model->indices[order]); + const SerdQuad pat = { 0, 0, 0, 0 }; + return serd_iter_new(model, cur, pat, order, ALL, 0); + } +} + +const SerdIter* +serd_model_end(const SerdModel* model) +{ + return model->end; +} + +SerdRange* +serd_model_all(const SerdModel* model) +{ + return serd_range_new(serd_model_begin(model), + serd_iter_copy(serd_model_end(model))); +} + +SerdIter* +serd_model_find(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + const SerdQuad pat = { s, p, o, g }; + if (!pat[0] && !pat[1] && !pat[2] && !pat[3]) { + return serd_model_begin(model); + } + + SearchMode mode; + int n_prefix; + const SerdOrder index_order = + serd_model_best_index(model, pat, &mode, &n_prefix); + + SERD_FIND_LOG("Find " TUP_FMT " index=%s mode=%d n_prefix=%d\n", + TUP_FMT_ARGS(pat), + order_names[index_order], + mode, + n_prefix); + + ZixBTree* const db = model->indices[index_order]; + ZixBTreeIter* cur = NULL; + + if (mode == FILTER_ALL) { + // No prefix shared with an index at all, linear search (worst case) + SERD_FIND_LOG("warning: Linear model search\n"); + cur = zix_btree_begin(db); + } else if (mode == FILTER_RANGE) { + /* Some prefix, but filtering still required. Build a search pattern + with only the prefix to find the lower bound in log time. */ + SERD_FIND_LOG("warning: Linear range search\n"); + SerdQuad prefix_pat = { NULL, NULL, NULL, NULL }; + const int* const ordering = orderings[index_order]; + for (int i = 0; i < n_prefix; ++i) { + prefix_pat[ordering[i]] = pat[ordering[i]]; + } + zix_btree_lower_bound(db, prefix_pat, &cur); + } else { + // Ideal case, pattern matches an index with no filtering required + zix_btree_lower_bound(db, pat, &cur); + } + + if (zix_btree_iter_is_end(cur)) { + SERD_FIND_LOG("No match found, iterator at end\n"); + zix_btree_iter_free(cur); + return NULL; + } + + const SerdStatement* const key = (const SerdStatement*)zix_btree_get(cur); + if (!key || (mode == RANGE && !serd_statement_matches_quad(key, pat))) { + SERD_FIND_LOG("No match found, cursor at " TUP_FMT "\n", + TUP_FMT_ARGS(key->nodes)); + + zix_btree_iter_free(cur); + return NULL; + } + + return serd_iter_new(model, cur, pat, index_order, mode, n_prefix); +} + +SerdRange* +serd_model_range(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + if (!s && !p && !o && !g) { + return serd_range_new(serd_model_begin(model), + serd_iter_copy(serd_model_end(model))); + } + + SerdIter* begin = serd_model_find(model, s, p, o, g); + SerdIter* end = begin ? serd_iter_new(model, + NULL, + begin->pat, + begin->order, + begin->mode, + begin->n_prefix) + : NULL; + return serd_range_new(begin, end); +} + +const SerdNode* +serd_model_get(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + const SerdStatement* statement = + serd_model_get_statement(model, s, p, o, g); + + if (statement) { + if (!s) { + return serd_statement_get_subject(statement); + } else if (!p) { + return serd_statement_get_predicate(statement); + } else if (!o) { + return serd_statement_get_object(statement); + } + } + + return NULL; +} + +const SerdStatement* +serd_model_get_statement(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + if ((bool)s + (bool)p + (bool)o != 2) { + return NULL; + } + + SerdIter* const i = serd_model_find(model, s, p, o, g); + if (i && i->cur) { + const SerdStatement* statement = serd_iter_get(i); + serd_iter_free(i); + return statement; + } + + return NULL; +} + +size_t +serd_model_count(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + SerdRange* const range = serd_model_range(model, s, p, o, g); + const size_t count = serd_range_size(range); + serd_range_free(range); + return count; +} + +bool +serd_model_ask(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + SerdIter* iter = serd_model_find(model, s, p, o, g); + bool ret = (iter != NULL); + serd_iter_free(iter); + return ret; +} + +static SerdCursor* +serd_model_intern_cursor(SerdModel* model, const SerdCursor* cursor) +{ + if (cursor) { + SerdCursor* copy = (SerdCursor*)calloc(1, sizeof(SerdCursor)); + + copy->file = serd_nodes_intern(model->world->nodes, cursor->file); + copy->line = cursor->line; + copy->col = cursor->col; + return copy; + } + + return NULL; +} + +SerdStatus +serd_model_add_internal(SerdModel* model, + const SerdCursor* cursor, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + SerdStatement* statement = (SerdStatement*)calloc(1, sizeof(SerdStatement)); + + statement->nodes[0] = s; + statement->nodes[1] = p; + statement->nodes[2] = o; + statement->nodes[3] = g; + statement->cursor = serd_model_intern_cursor(model, cursor); + + SERD_WRITE_LOG("Add " TUP_FMT "\n", TUP_FMT_ARGS(statement->nodes)); + + bool added = false; + for (unsigned i = 0; i < NUM_ORDERS; ++i) { + if (model->indices[i]) { + if (!zix_btree_insert(model->indices[i], statement)) { + added = true; + } else if (i == GSPO) { + break; // Statement already indexed + } + } + } + + ++model->version; + if (added) { + return SERD_SUCCESS; + } + + serd_model_drop_statement(model, statement); + return SERD_FAILURE; +} + +SerdStatus +serd_model_add(SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + if (!s || !p || !o) { + return serd_world_errorf(model->world, + SERD_ERR_BAD_ARG, + "attempt to add statement with NULL field\n"); + } + + return serd_model_add_internal(model, + NULL, + serd_nodes_intern(model->world->nodes, s), + serd_nodes_intern(model->world->nodes, p), + serd_nodes_intern(model->world->nodes, o), + serd_nodes_intern(model->world->nodes, g)); +} + +SerdStatus +serd_model_insert(SerdModel* model, const SerdStatement* statement) +{ + return serd_model_add_internal(model, + serd_statement_get_cursor(statement), + serd_statement_get_subject(statement), + serd_statement_get_predicate(statement), + serd_statement_get_object(statement), + serd_statement_get_graph(statement)); +} + +SerdStatus +serd_model_add_range(SerdModel* model, SerdRange* range) +{ + SerdStatus st = SERD_SUCCESS; + for (; !st && !serd_range_empty(range); serd_range_next(range)) { + st = serd_model_insert(model, serd_range_front(range)); + } + + return st; +} + +SerdStatus +serd_model_erase(SerdModel* model, SerdIter* iter) +{ + const SerdStatement* statement = serd_iter_get(iter); + + SERD_WRITE_LOG("Remove " TUP_FMT "\n", TUP_FMT_ARGS(statement->nodes)); + + SerdStatement* removed = NULL; + for (unsigned i = 0; i < NUM_ORDERS; ++i) { + if (model->indices[i]) { + zix_btree_remove(model->indices[i], + statement, + (void**)&removed, + i == iter->order ? &iter->cur : NULL); + } + } + serd_iter_scan_next(iter); + + serd_model_drop_statement(model, removed); + iter->version = ++model->version; + + return SERD_SUCCESS; +} + +SerdStatus +serd_model_erase_range(SerdModel* model, SerdRange* range) +{ + while (!serd_range_empty(range)) { + serd_model_erase(model, range->begin); + } + + return SERD_SUCCESS; +} diff --git a/src/model.h b/src/model.h new file mode 100644 index 00000000..23151b63 --- /dev/null +++ b/src/model.h @@ -0,0 +1,53 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_MODEL_H +#define SERD_MODEL_H + +#include "serd/serd.h" +#include "zix/btree.h" + +#include +#include + +#define SERD_NUM_ORDERS 12 + +struct SerdModelImpl +{ + SerdWorld* world; + + SerdIndexOptions index_options; + SerdModelFlags flags; + + /** Index for each possible triple ordering (may or may not exist). + * Each index is a tree of SordQuad with the appropriate ordering. + */ + ZixBTree* indices[SERD_NUM_ORDERS]; + + SerdIter* end; + + uint64_t version; +}; + +SerdStatus +serd_model_add_internal(SerdModel* model, + const SerdCursor* cursor, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +#endif // SERD_MODEL_H diff --git a/src/node.c b/src/node.c index 1f22bf4f..1af22aa9 100644 --- a/src/node.c +++ b/src/node.c @@ -64,6 +64,14 @@ serd_node_pad_size(const size_t n_bytes) return size; } +static const SerdNode* +serd_node_maybe_get_meta_c(const SerdNode* node) +{ + return (node->flags & (SERD_HAS_LANGUAGE | SERD_HAS_DATATYPE)) + ? (node + 1 + (serd_node_pad_size(node->n_bytes) / serd_node_align)) + : NULL; +} + static const SerdNode* serd_node_get_meta_c(const SerdNode* node) { @@ -301,6 +309,33 @@ serd_node_equals(const SerdNode* a, const SerdNode* b) return false; } +int +serd_node_compare(const SerdNode* a, const SerdNode* b) +{ + if (a == b) { + return 0; + } else if (!a || !b) { + return (a < b) ? -1 : (a > b) ? 1 : 0; + } else if (a->type != b->type) { + return a->type - b->type; + } + + const int cmp = strcmp(serd_node_get_string(a), serd_node_get_string(b)); + if (cmp) { + return cmp; + } + + return serd_node_compare(serd_node_maybe_get_meta_c(a), + serd_node_maybe_get_meta_c(b)); +} + +/** Compare nodes, considering NULL a wildcard match. */ +int +serd_node_wildcard_compare(const SerdNode* a, const SerdNode* b) +{ + return (!a || !b) ? 0 : serd_node_compare(a, b); +} + static size_t serd_uri_string_length(const SerdURI* uri) { diff --git a/src/node.h b/src/node.h index c5d0e56b..bc6e6693 100644 --- a/src/node.h +++ b/src/node.h @@ -45,5 +45,6 @@ void serd_node_set(SerdNode** dst, const SerdNode* src); size_t serd_node_total_size(const SerdNode* node); void serd_node_zero_pad(SerdNode* node); SerdNode* serd_new_resolved_uri_i(const char* str, const SerdURI* base); +int serd_node_wildcard_compare(const SerdNode* a, const SerdNode* b); #endif // SERD_NODE_H diff --git a/src/range.c b/src/range.c new file mode 100644 index 00000000..f83855c1 --- /dev/null +++ b/src/range.c @@ -0,0 +1,248 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "range.h" + +#include "iter.h" +#include "log.h" +#include "model.h" +#include "sink.h" +#include "world.h" + +#include +#include + +typedef enum { NAMED, ANON_S, ANON_O, LIST_S, LIST_O } NodeStyle; + +static SerdStatus +write_range_statement(const SerdSink* sink, + const SerdModel* model, + unsigned depth, + SerdStatementFlags flags, + const SerdStatement* statement); + +SerdRange* +serd_range_new(SerdIter* const begin, SerdIter* const end) +{ + SerdRange* range = (SerdRange*)malloc(sizeof(SerdRange)); + + range->begin = begin; + range->end = end; + + return range; +} + +SerdRange* +serd_range_copy(const SerdRange* range) +{ + SerdRange* copy = (SerdRange*)malloc(sizeof(SerdRange)); + + memcpy(copy, range, sizeof(SerdRange)); + copy->begin = serd_iter_copy(range->begin); + copy->end = serd_iter_copy(range->end); + + return copy; +} + +void +serd_range_free(SerdRange* range) +{ + serd_iter_free(range->begin); + serd_iter_free(range->end); + free(range); +} + +const SerdStatement* +serd_range_front(const SerdRange* range) +{ + return serd_iter_get(range->begin); +} + +bool +serd_range_equals(const SerdRange* lhs, const SerdRange* rhs) +{ + return (!lhs && !rhs) || + (lhs && rhs && serd_iter_equals(lhs->begin, rhs->begin) && + serd_iter_equals(lhs->end, rhs->end)); +} + +bool +serd_range_next(SerdRange* range) +{ + return serd_iter_next(range->begin); +} + +size_t +serd_range_size(const SerdRange* range) +{ + SerdRange* r = serd_range_copy(range); + uint64_t n = 0; + for (; !serd_range_empty(r); serd_range_next(r)) { + ++n; + } + serd_range_free(r); + return n; +} + +bool +serd_range_empty(const SerdRange* range) +{ + return serd_iter_equals(range->begin, range->end); +} + +const SerdIter* +serd_range_begin(const SerdRange* range) +{ + return range->begin; +} + +const SerdIter* +serd_range_end(const SerdRange* range) +{ + return range->end; +} + +static NodeStyle +get_node_style(const SerdModel* model, const SerdNode* object) +{ + const size_t num_as_object = + serd_model_count(model, NULL, NULL, object, NULL); + if (serd_node_get_type(object) != SERD_BLANK || num_as_object > 1) { + return NAMED; // Non-blank and/or blank referred to several times + } + + if (serd_model_ask(model, object, model->world->rdf_first, NULL, NULL) && + serd_model_ask(model, object, model->world->rdf_rest, NULL, NULL)) { + return num_as_object == 0 ? LIST_S : LIST_O; + } + + return num_as_object == 0 ? ANON_S : ANON_O; +} + +static SerdStatus +write_range_internal(const SerdSink* sink, + const unsigned depth, + const SerdStatementFlags flags, + SerdRange* range) +{ + SerdStatus st = SERD_SUCCESS; + const SerdModel* model = range && range->begin ? range->begin->model : NULL; + for (; !st && !serd_range_empty(range); serd_range_next(range)) { + st = write_range_statement( + sink, model, depth, 0, serd_range_front(range)); + } + + return st; +} + +static SerdStatus +write_list(const SerdSink* sink, + const SerdModel* model, + const unsigned depth, + const SerdNode* object) +{ + SerdStatus st = SERD_SUCCESS; + const SerdWorld* world = model->world; + const SerdNode* first = world->rdf_first; + const SerdNode* rest = world->rdf_rest; + const SerdNode* nil = world->rdf_nil; + + SerdIter* f = serd_model_find(model, object, first, NULL, NULL); + while (!st && f && !serd_node_equals(object, nil)) { + const SerdStatement* fs = serd_iter_get(f); + st = write_range_statement(sink, model, depth + 1, 0, fs); + serd_iter_free(f); + + SerdIter* const r = serd_model_find(model, object, rest, NULL, NULL); + const SerdNode* next = + r ? serd_statement_get_object(serd_iter_get(r)) : NULL; + + f = next ? serd_model_find(model, next, first, NULL, NULL) : NULL; + if (r && f) { + // This and next node are good, write rdf:next statement + st = serd_sink_write_statement(sink, 0, serd_iter_get(r)); + object = next; + } else { + // Terminate malformed list + st = serd_sink_write_nodes( + sink, 0, object, rest, nil, serd_statement_get_graph(fs)); + f = NULL; + } + + serd_iter_free(r); + } + + return st; +} + +static SerdStatus +write_range_statement(const SerdSink* sink, + const SerdModel* model, + const unsigned depth, + SerdStatementFlags flags, + const SerdStatement* statement) +{ + const SerdNode* subject = serd_statement_get_subject(statement); + const NodeStyle subject_style = get_node_style(model, subject); + + if (depth == 0 && (subject_style == ANON_O || subject_style == LIST_O)) { + return SERD_SUCCESS; // Skip subject that will be inlined somewhere + } else if (subject_style == ANON_S) { + flags |= SERD_EMPTY_S; // Write anonymous subjects in "[] p o" style + } + + const SerdNode* object = serd_statement_get_object(statement); + const NodeStyle object_style = get_node_style(model, object); + SerdStatus st = SERD_SUCCESS; + if (object_style == ANON_O) { + flags |= SERD_ANON_O_BEGIN; + + SerdRange* subrange = serd_model_range(model, object, NULL, NULL, NULL); + st = st ? st : serd_sink_write_statement(sink, flags, statement); + st = st ? st : write_range_internal(sink, depth + 1, 0, subrange); + st = st ? st : serd_sink_end(sink, object); + serd_range_free(subrange); + + return st; + } else if (object_style == LIST_O) { + flags |= SERD_LIST_O_BEGIN; + st = st ? st : serd_sink_write_statement(sink, flags, statement); + st = st ? st : write_list(sink, model, depth + 1, object); + return st; + } + + return serd_sink_write_statement(sink, flags, statement); +} + +SerdStatus +serd_range_serialise(const SerdRange* range, + const SerdSink* sink, + const SerdSerialisationFlags flags) +{ + SerdStatus st = SERD_SUCCESS; + SerdRange* const copy = serd_range_copy(range); + + if (flags & SERD_NO_INLINE_OBJECTS) { + for (; !st && !serd_range_empty(copy); serd_range_next(copy)) { + st = sink->statement(sink->handle, 0, serd_range_front(copy)); + } + } else { + st = write_range_internal(sink, 0, 0, copy); + } + + serd_range_free(copy); + return st; +} diff --git a/src/range.h b/src/range.h new file mode 100644 index 00000000..d07741ef --- /dev/null +++ b/src/range.h @@ -0,0 +1,36 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_RANGE_H +#define SERD_RANGE_H + +#include "iter.h" +#include "statement.h" + +#include "serd/serd.h" + +struct SerdRangeImpl { + SerdIter* begin; ///< Iterator to first statement in range + SerdIter* end; ///< Iterator to end of range +}; + +SerdRange* +serd_range_new(SerdIter* begin, SerdIter* end); + +bool +serd_range_scan_next(SerdRange* range); + +#endif // SERD_RANGE_H diff --git a/src/serdi.c b/src/serdi.c index 6259a6bf..2bef5e4a 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -51,10 +51,12 @@ print_usage(const char* name, bool error) fprintf(os, " -b Fast bulk output for large serialisations.\n"); fprintf(os, " -c PREFIX Chop PREFIX from matching blank node IDs.\n"); fprintf(os, " -e Eat input one character at a time.\n"); + fprintf(os, " -f Fast serialisation without object inlining\n"); fprintf(os, " -h Display this help and exit.\n"); fprintf(os, " -i SYNTAX Input syntax: turtle/ntriples/trig/nquads.\n"); fprintf(os, " -k BYTES Parser stack size.\n"); fprintf(os, " -l Lax (non-strict) parsing.\n"); + fprintf(os, " -m Build and serialise a model (no streaming).\n"); fprintf(os, " -o SYNTAX Output syntax: turtle/ntriples/nquads.\n"); fprintf(os, " -p PREFIX Add PREFIX to blank node IDs.\n"); fprintf(os, " -q Suppress all output except data.\n"); @@ -91,7 +93,9 @@ main(int argc, char** argv) bool ascii = false; bool bulk_read = true; bool bulk_write = false; + bool no_inline = false; bool lax = false; + bool use_model = false; bool quiet = false; long stack_size = 4194304; const char* add_prefix = NULL; @@ -108,10 +112,14 @@ main(int argc, char** argv) bulk_write = true; } else if (argv[a][1] == 'e') { bulk_read = false; + } else if (argv[a][1] == 'f') { + no_inline = true; } else if (argv[a][1] == 'h') { return print_usage(argv[0], false); } else if (argv[a][1] == 'l') { lax = true; + } else if (argv[a][1] == 'm') { + use_model = true; } else if (argv[a][1] == 'q') { quiet = true; } else if (argv[a][1] == 'v') { @@ -191,6 +199,9 @@ main(int argc, char** argv) const SerdStyleFlags output_style = (ascii ? SERD_STYLE_ASCII : 0); + const SerdSerialisationFlags serialisation_flags = + no_inline ? SERD_NO_INLINE_OBJECTS : 0; + SerdByteSink* byte_sink = serd_byte_sink_new( (SerdWriteFunc)fwrite, out_fd, bulk_write ? 4096 : 1); @@ -201,9 +212,20 @@ main(int argc, char** argv) (SerdWriteFunc)serd_byte_sink_write, byte_sink); - SerdReader* reader = serd_reader_new( - world, input_syntax, serd_writer_get_sink(writer), stack_size); + SerdReader* reader = NULL; + SerdModel* model = NULL; + SerdInserter* inserter = NULL; + const SerdSink* sink = NULL; + if (use_model) { + const SerdIndexOptions indices = SERD_SPO | (no_inline ? 0 : SERD_OPS); + model = serd_model_new(world, indices, input_has_graphs); + inserter = serd_inserter_new(model, env, NULL); + sink = serd_inserter_get_sink(inserter); + } else { + sink = serd_writer_get_sink(writer); + } + reader = serd_reader_new(world, input_syntax, sink, stack_size); serd_reader_set_strict(reader, !lax); if (quiet) { serd_world_set_error_sink(world, quiet_error_sink, NULL); @@ -239,6 +261,19 @@ main(int argc, char** argv) } serd_reader_finish(reader); + + if (status <= SERD_FAILURE && use_model) { + SerdSink* wsink = serd_writer_get_sink(writer); + serd_env_send_prefixes(env, wsink); + + SerdRange* range = serd_model_all(model); + status = serd_range_serialise(range, wsink, serialisation_flags); + serd_range_free(range); + } + + serd_node_free(input_name); + serd_inserter_free(inserter); + serd_model_free(model); serd_reader_free(reader); serd_writer_free(writer); serd_byte_sink_free(byte_sink); diff --git a/src/statement.c b/src/statement.c index 24440a38..347cfe78 100644 --- a/src/statement.c +++ b/src/statement.c @@ -51,3 +51,35 @@ serd_statement_get_cursor(const SerdStatement* statement) { return statement->cursor; } + +bool +serd_statement_equals(const SerdStatement* a, const SerdStatement* b) +{ + return (serd_node_equals(a->nodes[0], b->nodes[0]) && + serd_node_equals(a->nodes[1], b->nodes[1]) && + serd_node_equals(a->nodes[2], b->nodes[2]) && + serd_node_equals(a->nodes[3], b->nodes[3])); +} + +bool +serd_statement_matches(const SerdStatement* statement, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* graph) + +{ + return (serd_node_pattern_match(statement->nodes[0], subject) && + serd_node_pattern_match(statement->nodes[1], predicate) && + serd_node_pattern_match(statement->nodes[2], object) && + serd_node_pattern_match(statement->nodes[3], graph)); +} + +bool +serd_statement_matches_quad(const SerdStatement* statement, const SerdQuad quad) +{ + return serd_node_pattern_match(statement->nodes[0], quad[0]) && + serd_node_pattern_match(statement->nodes[1], quad[1]) && + serd_node_pattern_match(statement->nodes[2], quad[2]) && + serd_node_pattern_match(statement->nodes[3], quad[3]); +} diff --git a/src/statement.h b/src/statement.h index 7c8384e9..4a1056f2 100644 --- a/src/statement.h +++ b/src/statement.h @@ -19,6 +19,8 @@ #include "serd/serd.h" +#include + /** Quad of nodes (a statement), or a quad pattern. @@ -31,4 +33,14 @@ struct SerdStatementImpl { SerdCursor* cursor; }; +static inline bool +serd_node_pattern_match(const SerdNode* a, const SerdNode* b) +{ + return !a || !b || (a == b) || serd_node_equals(a, b); +} + +bool +serd_statement_matches_quad(const SerdStatement* statement, + const SerdQuad quad); + #endif // SERD_STATEMENT_H diff --git a/src/string.c b/src/string.c index f4df39ff..a1d3a0b5 100644 --- a/src/string.c +++ b/src/string.c @@ -38,6 +38,7 @@ serd_strerror(SerdStatus status) case SERD_ERR_UNKNOWN: return "Unknown error"; case SERD_ERR_BAD_SYNTAX: return "Invalid syntax"; case SERD_ERR_BAD_ARG: return "Invalid argument"; + case SERD_ERR_BAD_ITER: return "Invalid iterator"; case SERD_ERR_NOT_FOUND: return "Not found"; case SERD_ERR_ID_CLASH: return "Blank node ID clash"; case SERD_ERR_BAD_CURIE: return "Invalid CURIE"; diff --git a/src/world.c b/src/world.c index 7baeb4bc..52ef1bce 100644 --- a/src/world.c +++ b/src/world.c @@ -104,6 +104,7 @@ void serd_world_free(SerdWorld* world) { serd_node_free(world->blank_node); + serd_nodes_free(world->nodes); free(world); } diff --git a/src/writer.c b/src/writer.c index 24f00429..446bcb18 100644 --- a/src/writer.c +++ b/src/writer.c @@ -1014,7 +1014,7 @@ serd_writer_free(SerdWriter* writer) free(writer); } -const SerdSink* +SerdSink* serd_writer_get_sink(SerdWriter* writer) { return &writer->iface; diff --git a/src/zix/btree.c b/src/zix/btree.c index 7521086d..a55c4699 100644 --- a/src/zix/btree.c +++ b/src/zix/btree.c @@ -55,6 +55,7 @@ typedef struct { } ZixBTreeIterFrame; struct ZixBTreeIterImpl { + unsigned n_levels; ///< Maximum depth of stack unsigned level; ///< Current level in stack ZixBTreeIterFrame stack[]; ///< Position stack }; @@ -350,7 +351,9 @@ zix_btree_iter_new(const ZixBTree* const t) { const size_t s = t->height * sizeof(ZixBTreeIterFrame); - return (ZixBTreeIter*)calloc(1, sizeof(ZixBTreeIter) + s); + ZixBTreeIter* i = (ZixBTreeIter*)calloc(1, sizeof(ZixBTreeIter) + s); + i->n_levels = t->height; + return i; } ZIX_PRIVATE void @@ -712,12 +715,47 @@ zix_btree_begin(const ZixBTree* const t) return i; } +ZIX_API ZixBTreeIter* +zix_btree_end(const ZixBTree* const t) +{ + return zix_btree_iter_new(t); +} + +ZIX_API ZixBTreeIter* +zix_btree_iter_copy(const ZixBTreeIter* const i) +{ + if (!i) { + return NULL; + } + + const size_t s = i->n_levels * sizeof(ZixBTreeIterFrame); + + ZixBTreeIter* j = (ZixBTreeIter*)malloc(sizeof(ZixBTreeIter) + s); + memcpy(j, i, sizeof(ZixBTreeIter) + s); + return j; +} + ZIX_API bool zix_btree_iter_is_end(const ZixBTreeIter* const i) { return !i || i->stack[0].node == NULL; } +ZIX_API bool +zix_btree_iter_equals(const ZixBTreeIter* const lhs, const ZixBTreeIter* const rhs) +{ + if (zix_btree_iter_is_end(lhs) && zix_btree_iter_is_end(rhs)) { + return true; + } else if (!lhs || !rhs || lhs->n_levels != rhs->n_levels) { + return false; + } + + return !memcmp(lhs, + rhs, + sizeof(ZixBTreeIter) + + lhs->n_levels * sizeof(ZixBTreeIterFrame)); +} + ZIX_API void zix_btree_iter_increment(ZixBTreeIter* const i) { diff --git a/src/zix/btree.h b/src/zix/btree.h index 46daa24a..0f71d1e3 100644 --- a/src/zix/btree.h +++ b/src/zix/btree.h @@ -125,6 +125,27 @@ zix_btree_get(const ZixBTreeIter* ti); ZIX_API ZixBTreeIter* zix_btree_begin(const ZixBTree* t); +/** + Return an iterator to the end of `t` (one past the last element). + + The returned iterator must be freed with zix_btree_iter_free(). +*/ +ZIX_API ZixBTreeIter* +zix_btree_end(const ZixBTree* t); + +/** + Return a new copy of `i`. +*/ +ZIX_API ZixBTreeIter* +zix_btree_iter_copy(const ZixBTreeIter* const i); + +/** + Return true iff `lhs` is equal to `rhs`. +*/ +ZIX_API bool +zix_btree_iter_equals(const ZixBTreeIter* const lhs, + const ZixBTreeIter* const rhs); + /** Return true iff `i` is an iterator to the end of its tree. */ diff --git a/tests/model_test.c b/tests/model_test.c new file mode 100644 index 00000000..395da7c3 --- /dev/null +++ b/tests/model_test.c @@ -0,0 +1,837 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include +#include +#include +#include + +#include "serd/serd.h" + +#include "../src/log.h" +#include "test_utils.h" + +#define WILDCARD_NODE NULL + +#define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#" +#define RDF_FIRST NS_RDF "first" +#define RDF_REST NS_RDF "rest" + +static const unsigned n_objects_per = 2; + +typedef const SerdNode* Quad[4]; + +typedef struct +{ + Quad query; + int expected_num_results; +} QueryTest; + +static SerdNode* +uri(SerdWorld* world, int num) +{ + char str[] = "eg:000"; + snprintf(str + 3, 4, "%03d", num); + return serd_new_uri(str); +} + +static int +generate(SerdWorld* world, SerdModel* model, size_t n_quads, SerdNode* graph) +{ + for (size_t i = 0; i < n_quads; ++i) { + int num = (i * n_objects_per) + 1; + + SerdNode* ids[2 + n_objects_per]; + for (unsigned j = 0; j < 2 + n_objects_per; ++j) { + ids[j] = uri(world, num++); + } + + for (unsigned j = 0; j < n_objects_per; ++j) { + if (serd_model_add(model, ids[0], ids[1], ids[2 + j], graph)) { + FAIL("Failed to add quad\n"); + } + } + + for (unsigned j = 0; j < 2 + n_objects_per; ++j) { + serd_node_free(ids[j]); + } + } + + // Add some literals + + // (98 4 "hello") and (98 4 "hello"^^<5>) + SerdNode* hello = serd_new_string("hello"); + SerdNode* hello_gb = serd_new_plain_literal("hello", "en-gb"); + SerdNode* hello_us = serd_new_plain_literal("hello", "en-us"); + SerdNode* hello_t4 = serd_new_typed_literal("hello", uri(world, 4)); + SerdNode* hello_t5 = serd_new_typed_literal("hello", uri(world, 5)); + if (serd_model_add(model, uri(world, 98), uri(world, 4), hello, graph)) { + FAIL("Failed to add untyped literal\n"); + } + if (serd_model_add(model, uri(world, 98), uri(world, 4), hello_t5, graph)) { + FAIL("Failed to add typed literal\n"); + } + + // (96 4 "hello"^^<4>) and (96 4 "hello"^^<5>) + if (serd_model_add(model, uri(world, 96), uri(world, 4), hello_t4, graph)) { + FAIL("Failed to add typed literal\n"); + } + if (serd_model_add(model, uri(world, 96), uri(world, 4), hello_t5, graph)) { + FAIL("Failed to add typed literal\n"); + } + + // (94 5 "hello") and (94 5 "hello"@en-gb) + if (serd_model_add(model, uri(world, 94), uri(world, 5), hello, graph)) { + FAIL("Failed to add literal\n"); + } + if (serd_model_add(model, uri(world, 94), uri(world, 5), hello_gb, graph)) { + FAIL("Failed to add literal with language\n"); + } + + // (92 6 "hello"@en-us) and (92 6 "hello"@en-gb) + if (serd_model_add(model, uri(world, 92), uri(world, 6), hello_us, graph)) { + FAIL("Failed to add literal with language\n"); + } + if (serd_model_add(model, uri(world, 92), uri(world, 6), hello_gb, graph)) { + FAIL("Failed to add literal with language\n"); + } + + // (14 6 "bonjour"@fr) and (14 6 "salut"@fr) + SerdNode* bonjour = serd_new_plain_literal("bonjour", "fr"); + SerdNode* salut = serd_new_plain_literal("salut", "fr"); + if (serd_model_add(model, uri(world, 14), uri(world, 6), bonjour, graph)) { + FAIL("Failed to add literal with language\n"); + } + if (serd_model_add(model, uri(world, 14), uri(world, 6), salut, graph)) { + FAIL("Failed to add literal with language\n"); + } + + // Attempt to add duplicates + if (!serd_model_add(model, uri(world, 14), uri(world, 6), salut, graph)) { + FAIL("Successfully added duplicate statement\n"); + } + + // Add a blank node subject + SerdNode* ablank = serd_new_blank("ablank"); + if (serd_model_add(model, ablank, uri(world, 6), salut, graph)) { + FAIL("Failed to add blank subject statement\n"); + } + + // Add statement with URI object + if (serd_model_add(model, ablank, uri(world, 6), uri(world, 7), graph)) { + FAIL("Failed to add URI subject statement\n"); + } + + return EXIT_SUCCESS; +} + +static int +test_read(SerdWorld* world, SerdModel* model, SerdNode* g, const size_t n_quads) +{ + SerdIter* iter = serd_model_begin(model); + for (; !serd_iter_equals(iter, serd_model_end(model)); + serd_iter_next(iter)) { + const SerdStatement* statement = serd_iter_get(iter); + if (!serd_statement_get_subject(statement) || + !serd_statement_get_predicate(statement) || + !serd_statement_get_object(statement)) { + FAIL("Iterator points to null node\n"); + } + } + + // Attempt to increment past end + if (!serd_iter_next(iter)) { + FAIL("Successfully incremented past end\n"); + } + + serd_iter_free(iter); + + const char* s = "hello"; + SerdNode* plain_hello = serd_new_string(s); + SerdNode* type4_hello = serd_new_typed_literal(s, uri(world, 4)); + SerdNode* type5_hello = serd_new_typed_literal(s, uri(world, 5)); + SerdNode* gb_hello = serd_new_plain_literal(s, "en-gb"); + SerdNode* us_hello = serd_new_plain_literal(s, "en-us"); + +#define NUM_PATTERNS 18 + + QueryTest patterns[NUM_PATTERNS] = { + { { NULL, NULL, NULL }, (int)(n_quads * n_objects_per) + 12 }, + { { uri(world, 1), WILDCARD_NODE, WILDCARD_NODE }, 2 }, + { { uri(world, 9), uri(world, 9), uri(world, 9) }, 0 }, + { { uri(world, 1), uri(world, 2), uri(world, 4) }, 1 }, + { { uri(world, 3), uri(world, 4), WILDCARD_NODE }, 2 }, + { { WILDCARD_NODE, uri(world, 2), uri(world, 4) }, 1 }, + { { WILDCARD_NODE, WILDCARD_NODE, uri(world, 4) }, 1 }, + { { uri(world, 1), WILDCARD_NODE, WILDCARD_NODE }, 2 }, + { { uri(world, 1), WILDCARD_NODE, uri(world, 4) }, 1 }, + { { WILDCARD_NODE, uri(world, 2), WILDCARD_NODE }, 2 }, + { { uri(world, 98), uri(world, 4), plain_hello }, 1 }, + { { uri(world, 98), uri(world, 4), type5_hello }, 1 }, + { { uri(world, 96), uri(world, 4), type4_hello }, 1 }, + { { uri(world, 96), uri(world, 4), type5_hello }, 1 }, + { { uri(world, 94), uri(world, 5), plain_hello }, 1 }, + { { uri(world, 94), uri(world, 5), gb_hello }, 1 }, + { { uri(world, 92), uri(world, 6), gb_hello }, 1 }, + { { uri(world, 92), uri(world, 6), us_hello }, 1 } + }; + + Quad match = { uri(world, 1), uri(world, 2), uri(world, 4), g }; + if (!serd_model_ask(model, match[0], match[1], match[2], match[3])) { + FAILF("No match for " TUP_FMT "\n", TUP_FMT_ARGS(match)); + } + + Quad nomatch = { uri(world, 1), uri(world, 2), uri(world, 9), g }; + if (serd_model_ask(model, nomatch[0], nomatch[1], nomatch[2], nomatch[3])) { + FAILF("False match for " TUP_FMT "\n", TUP_FMT_ARGS(nomatch)); + } + + if (serd_model_get(model, NULL, NULL, uri(world, 3), g)) { + FAIL("Get *,*,3 succeeded\n"); + } else if (serd_model_get(model, uri(world, 1), uri(world, 99), NULL, g)) { + FAIL("Get 1,2,9 succeeded\n"); + } else if (!serd_node_equals( + serd_model_get( + model, uri(world, 1), uri(world, 2), NULL, g), + uri(world, 3))) { + FAIL("Get 1,2,* != 3\n"); + } else if (!serd_node_equals( + serd_model_get( + model, uri(world, 1), NULL, uri(world, 3), g), + uri(world, 2))) { + FAIL("Get 1,*,3 != 2\n"); + } else if (!serd_node_equals( + serd_model_get( + model, NULL, uri(world, 2), uri(world, 3), g), + uri(world, 1))) { + FAIL("Get *,2,3 != 1\n"); + } + + for (unsigned i = 0; i < NUM_PATTERNS; ++i) { + QueryTest test = patterns[i]; + Quad pat = { test.query[0], test.query[1], test.query[2], g }; + + SerdRange* range = serd_model_range(model, pat[0], pat[1], pat[2], pat[3]); + int num_results = 0; + for (; !serd_range_empty(range); serd_range_next(range)) { + ++num_results; + if (!serd_statement_matches(serd_range_front(range), + pat[0], + pat[1], + pat[2], + pat[3])) { + serd_iter_free(iter); + FAILF("Query result " TUP_FMT " does not match pattern " TUP_FMT + "\n", + STATEMENT_FMT_ARGS(serd_range_front(range)), + TUP_FMT_ARGS(pat)); + } + } + serd_range_free(range); + + if (num_results != test.expected_num_results) { + FAILF("Expected %d results for " TUP_FMT ", got %d\n", + test.expected_num_results, + TUP_FMT_ARGS(pat), + num_results); + } + } + + // Query blank node subject + Quad pat = { serd_new_blank("ablank"), 0, 0 }; + int num_results = 0; + SerdRange* range = serd_model_range(model, pat[0], pat[1], pat[2], pat[3]); + for (; !serd_range_empty(range); serd_range_next(range)) { + ++num_results; + const SerdStatement* statement = serd_range_front(range); + if (!serd_statement_matches( + statement, pat[0], pat[1], pat[2], pat[3])) { + serd_range_free(range); + FAILF("Result for " TUP_FMT " does not match pattern\n", + TUP_FMT_ARGS(pat)); + } + } + serd_range_free(range); + + if (num_results != 2) { + FAIL("Blank node subject query failed\n"); + } + + // Test nested queries + const SerdNode* last_subject = 0; + range = serd_model_range(model, NULL, NULL, NULL, NULL); + for (; !serd_range_empty(range); serd_range_next(range)) { + const SerdStatement* statement = serd_range_front(range); + const SerdNode* subject = serd_statement_get_subject(statement); + if (subject == last_subject) { + continue; + } + + Quad subpat = { subject, 0, 0 }; + SerdRange* subrange = serd_model_range( + model, subpat[0], subpat[1], subpat[2], subpat[3]); + const SerdStatement* substatement = serd_range_front(subrange); + uint64_t num_sub_results = 0; + if (serd_statement_get_subject(substatement) != subject) { + FAIL("Incorrect initial submatch\n"); + } + for (; !serd_range_empty(subrange); serd_range_next(subrange)) { + if (!serd_statement_matches(serd_range_front(subrange), + subpat[0], + subpat[1], + subpat[2], + subpat[3])) { + serd_range_free(range); + serd_range_free(subrange); + FAIL("Nested query result does not match pattern\n"); + } + ++num_sub_results; + } + serd_range_free(subrange); + if (num_sub_results != n_objects_per) { + FAILF("Nested query " TUP_FMT " failed" + " (%d results, expected %d)\n", + TUP_FMT_ARGS(subpat), + num_sub_results, + n_objects_per); + } + + uint64_t count = serd_model_count(model, subject, 0, 0, 0); + if (count != num_sub_results) { + FAILF("Query " TUP_FMT " count %d does not match result count %d\n", + TUP_FMT_ARGS(subpat), + count, + num_sub_results); + } + + last_subject = subject; + } + serd_range_free(range); + + return 0; +} + +static SerdStatus +unexpected_error(void* handle, const SerdError* error) +{ + fprintf(stderr, "error: "); + vfprintf(stderr, error->fmt, *error->args); + return SERD_SUCCESS; +} + +static SerdStatus +expected_error(void* handle, const SerdError* error) +{ + fprintf(stderr, "expected: "); + vfprintf(stderr, error->fmt, *error->args); + return SERD_SUCCESS; +} + +static int +test_free_null(SerdWorld* world, const size_t n_quads) +{ + serd_model_free(NULL); // Shouldn't crash + return 0; +} + +static int +test_get_world(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, false); + if (serd_model_get_world(model) != world) { + FAIL("Model has incorrect world pointer\n"); + } + + serd_model_free(model); + return 0; +} + +static int +test_all_begin(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, false); + SerdRange* all = serd_model_all(model); + SerdIter* begin = serd_model_find(model, NULL, NULL, NULL, NULL); + if (!serd_iter_equals(serd_range_begin(all), begin)) { + FAIL("Model range does not start with begin iterator\n"); + } + + serd_range_free(all); + serd_iter_free(begin); + serd_model_free(model); + return 0; +} + +static int +test_add_null(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, false); + + serd_world_set_error_sink(world, expected_error, NULL); + if (!serd_model_add(model, 0, 0, 0, 0)) { + FAIL("Added NULL tuple\n"); + } else if (!serd_model_add(model, uri(world, 1), 0, 0, 0)) { + FAIL("Added tuple with NULL P and O\n"); + } else if (!serd_model_add(model, uri(world, 1), uri(world, 2), 0, 0)) { + FAIL("Added tuple with NULL O\n"); + } else if (!serd_model_empty(model)) { + FAIL("Model contains invalid statements\n"); + } + + serd_model_free(model); + return 0; +} + +static int +test_add_with_iterator(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, false); + + serd_world_set_error_sink(world, expected_error, NULL); + if (serd_model_add(model, uri(world, 1), uri(world, 2), uri(world, 3), 0)) { + FAIL("Failed to add statement\n"); + } + + // Add a statement with an active iterator + SerdIter* iter = serd_model_begin(model); + if (serd_model_add(model, uri(world, 1), uri(world, 2), uri(world, 4), 0)) { + FAIL("Failed to add statement with active iterator\n"); + } + + // Check that iterator has been invalidated + if (serd_iter_get(iter)) { + FAIL("Successfully dereferenced invalidated iterator\n"); + } else if (!serd_iter_next(iter)) { + FAIL("Successfully incremented invalidated iterator\n"); + } + + serd_iter_free(iter); + serd_model_free(model); + return 0; +} + +static int +test_erase_with_iterator(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, false); + + serd_world_set_error_sink(world, expected_error, NULL); + if (serd_model_add(model, uri(world, 1), uri(world, 2), uri(world, 3), 0) || + serd_model_add(model, uri(world, 4), uri(world, 5), uri(world, 6), 0)) { + FAIL("Failed to add statements\n"); + } + + // Erase a statement with an active iterator + SerdIter* iter1 = serd_model_begin(model); + SerdIter* iter2 = serd_model_begin(model); + if (serd_model_erase(model, iter1)) { + FAIL("Failed to erase statement\n"); + } + + // Check that erased iterator points to the next statement + if (!serd_statement_matches(serd_iter_get(iter1), + uri(world, 4), + uri(world, 5), + uri(world, 6), + 0)) { + FAIL("Erased iterator was not incremented\n"); + } + + // Check that other iterator has been invalidated + if (serd_iter_get(iter2)) { + FAIL("Successfully dereferenced invalidated iterator\n"); + } else if (!serd_iter_next(iter2)) { + FAIL("Successfully incremented invalidated iterator\n"); + } + + serd_iter_free(iter2); + serd_iter_free(iter1); + serd_model_free(model); + return 0; +} + +static int +test_add_erase(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, true); + + // Add (s p "hello") + SerdNode* s = uri(world, 1); + SerdNode* p = uri(world, 2); + SerdNode* hello = serd_new_string("hello"); + if (serd_model_add(model, s, p, hello, 0)) { + FAIL("Failed to add statement (s p \"hello\")\n"); + } else if (!serd_model_ask(model, s, p, hello, 0)) { + FAIL("Added statement (s p \"hello\") not found\n"); + } + + // Add (s p "hi") + SerdNode* hi = serd_new_string("hi"); + if (serd_model_add(model, s, p, hi, NULL)) { + FAIL("Failed to add statement (s p \"hi\")\n"); + } else if (!serd_model_ask(model, s, p, hi, 0)) { + FAIL("Added statement (s p \"hi\") not found\n"); + } + + // Erase (s p "hi") + SerdIter* iter = serd_model_find(model, s, p, hi, NULL); + if (serd_model_erase(model, iter)) { + FAIL("Failed to erase statement (s p \"hi\")\n"); + } else if (serd_model_size(model) != 1) { + FAILF("Expected 1 node after erasure, not %zu\n", + serd_model_size(model)); + } + serd_iter_free(iter); + + // Check that erased statement can not be found + SerdRange* empty = serd_model_range(model, s, p, hi, NULL); + if (!serd_range_empty(empty)) { + FAIL("Found removed statement (s p \"hi\")\n"); + } + serd_range_free(empty); + + serd_model_free(model); + return 0; +} + +static int +test_erase_all(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, false); + generate(world, model, n_quads, NULL); + + SerdIter* iter = serd_model_begin(model); + while (!serd_iter_equals(iter, serd_model_end(model))) { + const SerdStatus st = serd_model_erase(model, iter); + if (st) { + FAILF("Failed to erase tuple via iterator (%s)\n", + serd_strerror(st)); + } + } + + serd_iter_free(iter); + serd_model_free(model); + return 0; +} + +static int +test_copy(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, false); + generate(world, model, n_quads, NULL); + + SerdModel* copy = serd_model_copy(model); + if (!serd_model_equals(model, copy)) { + FAIL("Model copy does not equal original\n"); + } + + serd_model_free(model); + serd_model_free(copy); + return 0; +} + +static int +test_equals(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, false); + generate(world, model, n_quads, NULL); + serd_model_add( + model, uri(world, 0), uri(world, 1), uri(world, 2), uri(world, 3)); + + CHECK(serd_model_equals(NULL, NULL)); + CHECK(!serd_model_equals(NULL, model)); + CHECK(!serd_model_equals(model, NULL)); + + SerdModel* empty = serd_model_new(world, SERD_SPO, false); + if (serd_model_equals(model, empty)) { + FAIL("Non-empty model equals empty model\n"); + } + + SerdModel* different = serd_model_new(world, SERD_SPO, false); + generate(world, different, n_quads, NULL); + serd_model_add( + different, uri(world, 1), uri(world, 1), uri(world, 2), uri(world, 3)); + + if (serd_model_size(model) != serd_model_size(different)) { + FAIL("Models with same number of statements report different size\n"); + } else if (serd_model_equals(model, different)) { + FAIL("Models with different statements are equal\n"); + } + + serd_model_free(model); + serd_model_free(empty); + serd_model_free(different); + return 0; +} + +static int +test_find_past_end(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, false); + SerdNode* s = uri(world, 1); + SerdNode* p = uri(world, 2); + SerdNode* o = uri(world, 3); + if (serd_model_add(model, s, p, o, 0)) { + FAIL("Failed to add statement\n"); + } else if (!serd_model_ask(model, s, p, o, 0)) { + FAIL("Added statement not found\n"); + } + + SerdNode* huge = uri(world, 999); + if (!serd_range_empty(serd_model_range(model, huge, huge, huge, 0))) { + FAIL("Found statement past end of model\n"); + } + + serd_model_free(model); + return 0; +} + +static int +test_iter_comparison(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, false); + + serd_world_set_error_sink(world, expected_error, NULL); + if (serd_model_add(model, uri(world, 1), uri(world, 2), uri(world, 3), 0)) { + FAIL("Failed to add statement\n"); + } + + // Add a statement with an active iterator + SerdIter* iter1 = serd_model_begin(model); + SerdIter* iter2 = serd_model_begin(model); + if (!serd_iter_equals(iter1, iter2)) { + FAIL("Begin iterators are not equal\n"); + } + + serd_iter_next(iter1); + if (serd_iter_equals(iter1, iter2)) { + FAIL("Begin iterator equals incremented begin iterator\n"); + } + + const SerdIter* end = serd_model_end(model); + if (!serd_iter_equals(iter1, end)) { + FAIL("Iterator incremented to end does not equal end iterator\n"); + } + + serd_iter_free(iter2); + serd_iter_free(iter1); + serd_model_free(model); + return 0; +} + +static int +test_triple_index_read(SerdWorld* world, const size_t n_quads) +{ + static const char* const index_names[6] = { "spo", "sop", "ops", + "osp", "pso", "pos" }; + + for (unsigned i = 0; i < 6; ++i) { + SerdModel* model = serd_model_new(world, (1 << i), false); + generate(world, model, n_quads, 0); + if (test_read(world, model, 0, n_quads)) { + FAILF("Failure reading index `%s'\n", index_names[i]); + } + serd_model_free(model); + } + + return 0; +} + +static int +test_quad_index_read(SerdWorld* world, const size_t n_quads) +{ + static const char* const index_names[6] = { "gspo", "gsop", "gops", + "gosp", "gpso", "gpos" }; + + for (unsigned i = 0; i < 6; ++i) { + SerdModel* model = serd_model_new(world, (1 << i), true); + SerdNode* graph = uri(world, 42); + generate(world, model, n_quads, graph); + if (test_read(world, model, graph, n_quads)) { + FAILF("Failure reading index `%s'\n", index_names[i]); + } + serd_model_free(model); + } + + return 0; +} + +static int +test_remove_graph(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, true); + + // Generate a couple of graphs + SerdNode* graph42 = uri(world, 42); + SerdNode* graph43 = uri(world, 43); + generate(world, model, 1, graph42); + generate(world, model, 1, graph43); + + // Remove one graph via range + SerdStatus st; + SerdRange* range = serd_model_range(model, NULL, NULL, NULL, graph43); + if ((st = serd_model_erase_range(model, range))) { + FAILF("Remove range failed (%s)\n", serd_strerror(st)); + } + serd_range_free(range); + + // Erase the first tuple (an element in the default graph) + SerdIter* iter = serd_model_begin(model); + if (serd_model_erase(model, iter)) { + FAIL("Failed to erase begin iterator on non-empty model\n"); + } + serd_iter_free(iter); + + // Ensure only the other graph is left + Quad pat = { 0, 0, 0, graph42 }; + for (iter = serd_model_begin(model); + !serd_iter_equals(iter, serd_model_end(model)); + serd_iter_next(iter)) { + if (!serd_statement_matches( + serd_iter_get(iter), pat[0], pat[1], pat[2], pat[3])) { + FAIL("Graph removal via iteration failed\n"); + } + } + serd_iter_free(iter); + + serd_model_free(model); + return 0; +} + +static int +test_default_graph(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, true); + SerdNode* s = uri(world, 1); + SerdNode* p = uri(world, 2); + SerdNode* o = uri(world, 3); + SerdNode* g1 = uri(world, 101); + SerdNode* g2 = uri(world, 102); + + // Insert the same statement into two graphs + if (serd_model_add(model, s, p, o, g1)) { + FAIL("Failed to insert statement into first graph\n"); + } + if (serd_model_add(model, s, p, o, g2)) { + FAIL("Failed to insert statement into second graph\n"); + } + + // Ensure we only see statement once in the default graph + if (serd_model_count(model, s, p, o, NULL) != 1) { + FAIL("Found duplicate triple in default graph\n"); + } + + serd_model_free(model); + return 0; +} + +static int +test_write_bad_list(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, true); + SerdNodes* nodes = serd_nodes_new(); + const SerdNode* s = serd_nodes_manage(nodes, serd_new_uri("urn:s")); + const SerdNode* p = serd_nodes_manage(nodes, serd_new_uri("urn:p")); + const SerdNode* list1 = serd_nodes_manage(nodes, serd_new_blank("l1")); + const SerdNode* list2 = serd_nodes_manage(nodes, serd_new_blank("l2")); + const SerdNode* nofirst = serd_nodes_manage(nodes, serd_new_blank("nof")); + const SerdNode* norest = serd_nodes_manage(nodes, serd_new_blank("nor")); + const SerdNode* pfirst = serd_nodes_manage(nodes, serd_new_uri(RDF_FIRST)); + const SerdNode* prest = serd_nodes_manage(nodes, serd_new_uri(RDF_REST)); + const SerdNode* val1 = serd_nodes_manage(nodes, serd_new_string("a")); + const SerdNode* val2 = serd_nodes_manage(nodes, serd_new_string("b")); + + // List where second node has no rdf:first + serd_model_add(model, s, p, list1, NULL); + serd_model_add(model, list1, pfirst, val1, NULL); + serd_model_add(model, list1, prest, nofirst, NULL); + + // List where second node has no rdf:rest + serd_model_add(model, s, p, list2, NULL); + serd_model_add(model, list2, pfirst, val1, NULL); + serd_model_add(model, list2, prest, norest, NULL); + serd_model_add(model, norest, pfirst, val2, NULL); + + SerdBuffer buffer = {NULL, 0}; + SerdEnv* env = serd_env_new(NULL); + SerdWriter* writer = serd_writer_new( + world, SERD_TURTLE, 0, env, serd_buffer_sink, &buffer); + + SerdRange* all = serd_model_all(model); + serd_range_serialise(all, serd_writer_get_sink(writer), 0); + serd_range_free(all); + + serd_writer_finish(writer); + const char* str = serd_buffer_sink_finish(&buffer); + const char* expected = "\n" + " (\n" + " \"a\"\n" + " ) , (\n" + " \"a\"\n" + " \"b\"\n" + " ) .\n"; + + if (strcmp(str, expected)) { + FAIL("Failed to serialise invalid list\n"); + } + + free(buffer.buf); + serd_writer_free(writer); + serd_model_free(model); + serd_env_free(env); + serd_nodes_free(nodes); + return 0; +} + +int +main(int argc, char** argv) +{ + static const size_t n_quads = 300; + + serd_model_free(NULL); // Shouldn't crash + + typedef int (*TestFunc)(SerdWorld*, size_t); + + const TestFunc tests[] = { test_free_null, + test_get_world, + test_all_begin, + test_add_null, + test_add_with_iterator, + test_erase_with_iterator, + test_add_erase, + test_erase_all, + test_copy, + test_equals, + test_find_past_end, + test_iter_comparison, + test_triple_index_read, + test_quad_index_read, + test_remove_graph, + test_default_graph, + test_write_bad_list, + NULL }; + + SerdWorld* world = serd_world_new(); + for (const TestFunc* t = tests; *t; ++t) { + serd_world_set_error_sink(world, unexpected_error, NULL); + if ((*t)(world, n_quads)) { + return EXIT_FAILURE; + } + } + + serd_world_free(world); + return 0; +} diff --git a/wscript b/wscript index 1ed4ad26..14706fd7 100644 --- a/wscript +++ b/wscript @@ -33,6 +33,9 @@ def options(ctx): 'largefile': 'build with large file support on 32-bit systems', 'no-posix': 'do not use POSIX functions, even if present'}) + opt.add_option('--dump', type='string', default='', dest='dump', + help='dump debugging output (iter, search, write, all)') + def configure(conf): autowaf.display_header('Serd Configuration') conf.load('compiler_c', cache=True) @@ -64,6 +67,14 @@ def configure(conf): defines = ['_POSIX_C_SOURCE=200809L'], mandatory = False) + dump = Options.options.dump.split(',') + if 'all' in dump or 'iter' in dump: + conf.define('SERD_DEBUG_ITER', 1) + if 'all' in dump or 'search' in dump: + conf.define('SERD_DEBUG_SEARCH', 1) + if 'all' in dump or 'write' in dump: + conf.define('SERD_DEBUG_WRITE', 1) + autowaf.set_lib_env(conf, 'serd', SERD_VERSION) conf.write_config_header('serd_config.h', remove=False) @@ -79,9 +90,13 @@ lib_source = ['src/base64.c', 'src/byte_source.c', 'src/cursor.c', 'src/env.c', + 'src/inserter.c', + 'src/iter.c', + 'src/model.c', 'src/n3.c', 'src/node.c', 'src/nodes.c', + 'src/range.c', 'src/reader.c', 'src/sink.c', 'src/statement.c', @@ -91,6 +106,7 @@ lib_source = ['src/base64.c', 'src/uri.c', 'src/world.c', 'src/writer.c', + 'src/zix/btree.c', 'src/zix/digest.c', 'src/zix/hash.c'] @@ -153,7 +169,8 @@ def build(bld): ('serd_test', 'tests/serd_test.c'), ('read_chunk_test', 'tests/read_chunk_test.c'), ('nodes_test', 'tests/nodes_test.c'), - ('overflow_test', 'tests/overflow_test.c')]: + ('overflow_test', 'tests/overflow_test.c'), + ('model_test', 'tests/model_test.c')]: bld(features = 'c cprogram', source = prog[1], use = 'libserd_profiled', @@ -292,9 +309,17 @@ def show_diff(from_lines, to_lines, from_filename, to_filename): tofile=os.path.abspath(to_filename)): sys.stderr.write(line) -def check_output(out_filename, check_filename, subst_from='', subst_to=''): +def check_output(out_filename, check_filename, subst_from='', subst_to='', sort=False): if not os.access(out_filename, os.F_OK): Logs.pprint('RED', 'FAIL: output %s is missing' % out_filename) + elif sort: + out_lines = sorted(set(io.open(out_filename, encoding='utf-8').readlines())) + check_lines = sorted(set(io.open(check_filename, encoding='utf-8').readlines())) + if out_lines != check_lines: + show_diff(check_lines, out_lines, check_filename, out_filename) + return False + else: + return True elif not file_equals(check_filename, out_filename, subst_from, subst_to): Logs.pprint('RED', 'FAIL: %s != %s' % (os.path.abspath(out_filename), check_filename)) @@ -412,14 +437,17 @@ def test_suite(ctx, base_uri, testdir, report, isyntax, osyntax, options=''): # Check output against test suite check_uri = model[test][mf + 'result'][0] check_path = file_uri_to_path(check_uri) + sort = '-m' in options result = autowaf.run_test( ctx, APPNAME, - lambda: check_output(action + '.out', check_path), + lambda: check_output(action + '.out', check_path, sort=sort), True, name=str(action) + ' check', quiet=True) # Run round-trip tests - test_thru(ctx, uri, action, check_path, - ' '.join(next(thru_options_iter)), isyntax, osyntax, options, quiet=True) + if not sort: + test_thru(ctx, uri, action, check_path, + ' '.join(next(thru_options_iter)), + isyntax, osyntax, options, quiet=True) # Write test report entry if report is not None: @@ -489,7 +517,8 @@ def test(ctx): ['serd_test', 'read_chunk_test', 'nodes_test', - 'overflow_test'], + 'overflow_test', + 'model_test'], name='Unit') def test_syntax_io(in_name, expected_name, lang): @@ -553,7 +582,8 @@ def test(ctx): 'serdi_static "file://%s/tests/good/manifest.ttl" > /dev/full' % srcdir, 1, name='write_error') - run_test_suites(ctx, '') + for opts in ['', '-m']: + run_test_suites(ctx, opts) autowaf.post_test(ctx, APPNAME) -- cgit v1.2.1