diff options
author | David Robillard <d@drobilla.net> | 2018-05-12 13:28:47 +0200 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2018-11-25 22:12:48 +0100 |
commit | c9fe9fdb61e15b64d03a8da062648ecd3f86d700 (patch) | |
tree | 4c2f6a751aed6a8abe03ef0b65bcbf675bf6bda8 | |
parent | 06f860fd95ba9f97e36416ad8fa3cdadba4f6259 (diff) | |
download | serd-c9fe9fdb61e15b64d03a8da062648ecd3f86d700.tar.gz serd-c9fe9fdb61e15b64d03a8da062648ecd3f86d700.tar.bz2 serd-c9fe9fdb61e15b64d03a8da062648ecd3f86d700.zip |
WIP: Add model
-rw-r--r-- | doc/serdi.1 | 7 | ||||
-rw-r--r-- | serd/serd.h | 484 | ||||
-rw-r--r-- | src/env.c | 8 | ||||
-rw-r--r-- | src/inserter.c | 106 | ||||
-rw-r--r-- | src/inserter.h | 31 | ||||
-rw-r--r-- | src/iter.c | 238 | ||||
-rw-r--r-- | src/iter.h | 88 | ||||
-rw-r--r-- | src/log.h | 60 | ||||
-rw-r--r-- | src/model.c | 651 | ||||
-rw-r--r-- | src/model.h | 53 | ||||
-rw-r--r-- | src/node.c | 35 | ||||
-rw-r--r-- | src/node.h | 1 | ||||
-rw-r--r-- | src/range.c | 248 | ||||
-rw-r--r-- | src/range.h | 36 | ||||
-rw-r--r-- | src/serdi.c | 39 | ||||
-rw-r--r-- | src/statement.c | 32 | ||||
-rw-r--r-- | src/statement.h | 12 | ||||
-rw-r--r-- | src/string.c | 1 | ||||
-rw-r--r-- | src/world.c | 1 | ||||
-rw-r--r-- | src/writer.c | 2 | ||||
-rw-r--r-- | src/zix/btree.c | 40 | ||||
-rw-r--r-- | src/zix/btree.h | 21 | ||||
-rw-r--r-- | tests/model_test.c | 837 | ||||
-rw-r--r-- | wscript | 44 |
24 files changed, 3063 insertions, 12 deletions
diff --git a/doc/serdi.1 b/doc/serdi.1 index 66d845c6..4b3c9c39 100644 --- a/doc/serdi.1 +++ b/doc/serdi.1 @@ -29,6 +29,13 @@ page of input has arrived. With this option serdi uses one page less memory, but will likely be significantly slower. .TP +\fB\-f\fR +Fast serialisation without object inlining. This affects only model +(non-streaming) serialisation, and disables searching and statement reordering +for pretty printing. Statements will be written in simple sorted order, which +is faster, but may result in less readable output in Turtle or TriG. + +.TP \fB\-h\fR Print the command line options. diff --git a/serd/serd.h b/serd/serd.h index 160db907..29bff936 100644 --- a/serd/serd.h +++ b/serd/serd.h @@ -83,6 +83,33 @@ typedef struct SerdStatementImpl SerdStatement; typedef struct SerdCursorImpl SerdCursor; /** + Model. + + A model is an indexed set of statements. It may be searched using various + patterns depending on which indices are enabled. +*/ +typedef struct SerdModelImpl SerdModel; + +/** + Model Inserter. + + An inserter is used for writing statements to a model using the Serd sink + interface. This makes it simple to write to a model directly using a + SerdReader, or any other code that writes statements to a SerdStatementSink. +*/ +typedef struct SerdInserterImpl SerdInserter; + +/** + Model Iterator. +*/ +typedef struct SerdIterImpl SerdIter; + +/** + Model Range. +*/ +typedef struct SerdRangeImpl SerdRange; + +/** Environment. Represents the state required to resolve a CURIE or relative URI, e.g. the @@ -127,6 +154,7 @@ typedef enum { SERD_ERR_UNKNOWN, /**< Unknown error */ SERD_ERR_BAD_SYNTAX, /**< Invalid syntax */ SERD_ERR_BAD_ARG, /**< Invalid argument */ + SERD_ERR_BAD_ITER, /**< Use of invalidated iterator */ SERD_ERR_NOT_FOUND, /**< Not found */ SERD_ERR_ID_CLASH, /**< Encountered clashing blank node IDs */ SERD_ERR_BAD_CURIE, /**< Invalid CURIE (e.g. prefix does not exist) */ @@ -180,6 +208,18 @@ typedef enum { typedef uint32_t SerdStatementFlags; /** + Flags that control style for a model serialisation. +*/ +typedef enum { + SERD_NO_INLINE_OBJECTS = 1 << 0, /**< Do not inline objects where possible */ +} SerdSerialisationFlag; + +/** + Bitwise OR of SerdStatementFlag values. +*/ +typedef uint32_t SerdSerialisationFlags; + +/** Type of a syntactic RDF node. This is more precise than the type of an abstract RDF node. An abstract @@ -254,6 +294,35 @@ typedef enum { } SerdField; /** + Flags for model features. +*/ +typedef enum { + SERD_MODEL_GRAPHS = 1 /**< Support multiple graphs in model. */ +} SerdModelFlag; + +/** + Bitwise OR of SerdModelFlag values. +*/ +typedef uint32_t SerdModelFlags; + +/** + Indexing option. +*/ +typedef enum { + SERD_SPO = 1, /**< Subject, Predicate, Object */ + SERD_SOP = 1 << 1, /**< Subject, Object, Predicate */ + SERD_OPS = 1 << 2, /**< Object, Predicate, Subject */ + SERD_OSP = 1 << 3, /**< Object, Subject, Predicate */ + SERD_PSO = 1 << 4, /**< Predicate, Subject, Object */ + SERD_POS = 1 << 5 /**< Predicate, Object, Subject */ +} SerdIndexOption; + +/** + Bitwise OR of SerdIndexOption values. +*/ +typedef uint32_t SerdIndexOptions; + +/** Bitwise OR of SerdNodeFlag values. */ typedef uint32_t SerdNodeFlags; @@ -634,6 +703,16 @@ bool serd_node_equals(const SerdNode* a, const SerdNode* b); /** + Compare two nodes. + + Returns less than, equal to, or greater than zero if `a` is less than, equal + to, or greater than `b`, respectively. +*/ +SERD_API +int +serd_node_compare(const SerdNode* a, const SerdNode* b); + +/** Create a new URI from a string. */ SERD_API @@ -984,6 +1063,13 @@ serd_env_foreach(const SerdEnv* env, void* handle); /** + Send all prefixes in `env` to `sink`. +*/ +SERD_API +void +serd_env_send_prefixes(const SerdEnv* env, SerdSink* sink); + +/** @} @name Sink @{ @@ -1233,7 +1319,7 @@ serd_writer_free(SerdWriter* writer); Return a sink interface that emits statements via `writer`. */ SERD_API -const SerdSink* +SerdSink* serd_writer_get_sink(SerdWriter* writer); /** @@ -1364,6 +1450,251 @@ serd_nodes_deref(SerdNodes* nodes, const SerdNode* node); /** @} + @name Model + @{ +*/ + +/** + Create a new model. + + @param world The world in which to make this model. + + @param indices Enabled indices, for example `SERD_SPO | SERD_OPS`. Be sure + to enable an index where the most significant node(s) are not variables in + your queries. For example, to make (? P O) queries, enable either SERD_OPS + or SERD_POS). + + @param flags Model options. +*/ +SERD_API +SerdModel* +serd_model_new(SerdWorld* world, SerdIndexOptions indices, SerdModelFlags flags); + +/** + Return a deep copy of `model`. +*/ +SERD_API +SerdModel* +serd_model_copy(const SerdModel* model); + +/** + Return true iff `a` is equal to `b`, ignoring statement cursor metadata. +*/ +SERD_API +bool +serd_model_equals(const SerdModel* a, const SerdModel* b); + +/** + Close and free `model`. +*/ +SERD_API +void +serd_model_free(SerdModel* model); + +/** + Get the world associated with `model`. +*/ +SERD_API +SerdWorld* +serd_model_get_world(SerdModel* model); + +/** + Return the number of statements stored in `model`. +*/ +SERD_API +size_t +serd_model_size(const SerdModel* model); + +/** + Return true iff there are no statements stored in `model`. +*/ +SERD_API +bool +serd_model_empty(const SerdModel* model); + +/** + Return an iterator to the start of `model`. +*/ +SERD_API +SerdIter* +serd_model_begin(const SerdModel* model); + +/** + Return an iterator to the end of `model`. +*/ +SERD_API +const SerdIter* +serd_model_end(const SerdModel* model); + +/** + Return a range of all statements in `model`. +*/ +SERD_API +SerdRange* +serd_model_all(const SerdModel* model); + +/** + Search for statements by a quad pattern. + @return An iterator to the first match, or NULL if no matches found. +*/ +SERD_API +SerdIter* +serd_model_find(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +/** + Search for statements by a quad pattern. + @return A range containins all matching statements. +*/ +SERD_API +SerdRange* +serd_model_range(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +/** + Search for a single node that matches a pattern. + Exactly one of `s`, `p`, `o` must be NULL. + This function is mainly useful for predicates that only have one value. + @return The first matching node, or NULL if no matches are found. +*/ +SERD_API +const SerdNode* +serd_model_get(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +/** + Search for a single statement that matches a pattern. + + This function is mainly useful for predicates that only have one value. + + @return The first matching statement, or NULL if none are found. +*/ +SERD_API +const SerdStatement* +serd_model_get_statement(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +/** + Return true iff a statement exists. +*/ +SERD_API +bool +serd_model_ask(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +/** + Return the number of matching statements. +*/ +SERD_API +size_t +serd_model_count(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +/** + Add a statement to a model from nodes. + + This function fails if there are any active iterators on `model`. +*/ +SERD_API +SerdStatus +serd_model_add(SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +/** + Add a statement to a model. + + This function fails if there are any active iterators on `model`. +*/ +SERD_API +SerdStatus +serd_model_insert(SerdModel* model, const SerdStatement* statement); + +/** + Add a range of statements to a model. + + This function fails if there are any active iterators on `model`. +*/ +SERD_API +SerdStatus +serd_model_add_range(SerdModel* model, SerdRange* range); + +/** + Remove a quad from a model via an iterator. + + Calling this function invalidates all iterators on `model` except `iter`. + + @param model The model which `iter` points to. + @param iter Iterator to the element to erase, which is incremented to the + next value on return. +*/ +SERD_API +SerdStatus +serd_model_erase(SerdModel* model, SerdIter* iter); + +/** + Remove a range from a model. + + Calling this function invalidates all iterators on `model` except `iter`. + + @param model The model which `range` points to. + @param range Range to erase, which will be empty on return; +*/ +SERD_API +SerdStatus +serd_model_erase_range(SerdModel* model, SerdRange* range); + +/** + @} + @name Inserter + @{ +*/ + +/** + Create an inserter for writing statements to a model. +*/ +SERD_API +SerdInserter* +serd_inserter_new(SerdModel* model, + SerdEnv* env, + const SerdNode* default_graph); + +/** + Free an inserter. +*/ +SERD_API +void +serd_inserter_free(SerdInserter* inserter); + +/** + Return a sink interface that adds statements via `inserter`. +*/ +SERD_API +const SerdSink* +serd_inserter_get_sink(SerdInserter* inserter); + +/** + @} @name Statement @{ */ @@ -1411,6 +1742,157 @@ const SerdCursor* serd_statement_get_cursor(const SerdStatement* statement); /** + Return true iff `a` is equal to `b`, ignoring statement cursor metadata. + + Only returns true if nodes are equivalent, does not perform wildcard matching. +*/ +SERD_API +bool +serd_statement_equals(const SerdStatement* a, const SerdStatement* b); + +/** + Return true iff `statement` matches the given pattern. + + The matching rules are the same used for querying: nodes match if they are + equivalent, and NULL acts as a wildcard that matches any node. +*/ +SERD_API +bool +serd_statement_matches(const SerdStatement* statement, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* graph); + +/** + @} + @name Iterator + @{ +*/ + +/** + Return a new copy of `iter`. +*/ +SERD_API +SerdIter* +serd_iter_copy(const SerdIter* iter); + +/** + Return the statement pointed to by `iter`. +*/ +SERD_API +const SerdStatement* +serd_iter_get(const SerdIter* iter); + +/** + Increment `iter` to point to the next statement. + + @return True iff `iter` has reached the end. +*/ +SERD_API +bool +serd_iter_next(SerdIter* iter); + +/** + Return true iff `lhs` is equal to `rhs`. +*/ +SERD_API +bool +serd_iter_equals(const SerdIter* lhs, const SerdIter* rhs); + +/** + Free `iter`. +*/ +SERD_API +void +serd_iter_free(SerdIter* iter); + +/** + @} + @name Range + @{ +*/ + +/** + Return a new copy of `range`. +*/ +SERD_API +SerdRange* +serd_range_copy(const SerdRange* range); + +/** + Free `range`. +*/ +SERD_API +void +serd_range_free(SerdRange* range); + +/** + Return the first statement in `range`, or NULL if `range` is empty. +*/ +SERD_API +const SerdStatement* +serd_range_front(const SerdRange* range); + +/** + Return true iff `lhs` is equal to `rhs`. +*/ +SERD_API +bool +serd_range_equals(const SerdRange* lhs, const SerdRange* rhs); + +/** + Increment the start of `range` to point to the next statement. +*/ +SERD_API +bool +serd_range_next(SerdRange* range); + +/** + Return the number of statements in `range`. +*/ +SERD_API +size_t +serd_range_size(const SerdRange* range); + +/** + Return true iff there are no statements in `range`. +*/ +SERD_API +bool +serd_range_empty(const SerdRange* range); + +/** + Return an iterator to the start of `range`. +*/ +SERD_API +const SerdIter* +serd_range_begin(const SerdRange* range); + +/** + Return an iterator to the end of `range`. +*/ +SERD_API +const SerdIter* +serd_range_end(const SerdRange* range); + +/** + Write `range` to `sink`. + + The serialisation style can be controlled with `flags`. The default is to + write statements in an order suited for pretty-printing with Turtle or TriG + with as many objects written inline as possible. If + `SERD_NO_INLINE_OBJECTS` is given, a simple sorted stream is written + instead, which is significantly faster since no searching is required, but + can result in ugly output for Turtle or Trig. +*/ +SERD_API +SerdStatus +serd_range_serialise(const SerdRange* range, + const SerdSink* sink, + SerdSerialisationFlags flags); + +/** @} @name Cursor @{ @@ -326,3 +326,11 @@ serd_env_foreach(const SerdEnv* env, func(handle, env->prefixes[i].name, env->prefixes[i].uri); } } + +void +serd_env_send_prefixes(const SerdEnv* env, SerdSink* sink) +{ + for (size_t i = 0; i < env->n_prefixes; ++i) { + serd_sink_set_prefix(sink, env->prefixes[i].name, env->prefixes[i].uri); + } +} diff --git a/src/inserter.c b/src/inserter.c new file mode 100644 index 00000000..393e2c20 --- /dev/null +++ b/src/inserter.c @@ -0,0 +1,106 @@ +/* + Copyright 2011-2018 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "inserter.h" + +#include "model.h" +#include "statement.h" +#include "world.h" + +#include <stdlib.h> + +static SerdStatus +serd_inserter_on_base(SerdInserter* inserter, const SerdNode* uri) +{ + return serd_env_set_base_uri(inserter->env, uri); +} + +static SerdStatus +serd_inserter_on_prefix(SerdInserter* inserter, + const SerdNode* name, + const SerdNode* uri) +{ + return serd_env_set_prefix(inserter->env, name, uri); +} + +static const SerdNode* +manage_or_intern(SerdNodes* nodes, SerdNode* manage, const SerdNode* intern) +{ + return manage ? serd_nodes_manage(nodes, manage) + : serd_nodes_intern(nodes, intern); +} + +static SerdStatus +serd_inserter_on_statement(SerdInserter* inserter, + const SerdStatementFlags flags, + const SerdStatement* statement) +{ + const SerdNode* const subject = serd_statement_get_subject(statement); + const SerdNode* const predicate = serd_statement_get_predicate(statement); + const SerdNode* const object = serd_statement_get_object(statement); + const SerdNode* const graph = serd_statement_get_graph(statement); + + // Attempt to expand all nodes to eliminate CURIEs + SerdNode* const s = serd_env_expand(inserter->env, subject); + SerdNode* const p = serd_env_expand(inserter->env, predicate); + SerdNode* const o = serd_env_expand(inserter->env, object); + SerdNode* const g = serd_env_expand(inserter->env, graph); + + SerdNodes* const nodes = inserter->model->world->nodes; + const SerdNode* model_graph = manage_or_intern(nodes, g, graph); + if (!model_graph) { + model_graph = serd_nodes_intern(nodes, inserter->default_graph); + } + + const SerdStatus st = + serd_model_add_internal(inserter->model, + statement->cursor, + manage_or_intern(nodes, s, subject), + manage_or_intern(nodes, p, predicate), + manage_or_intern(nodes, o, object), + model_graph); + + return st > SERD_FAILURE ? st : SERD_SUCCESS; +} + +SerdInserter* +serd_inserter_new(SerdModel* model, SerdEnv* env, const SerdNode* default_graph) +{ + SerdInserter* inserter = (SerdInserter*)calloc(1, sizeof(SerdInserter)); + inserter->model = model; + inserter->env = env; + inserter->default_graph = serd_node_copy(default_graph); + inserter->iface.handle = inserter; + inserter->iface.base = (SerdBaseSink)serd_inserter_on_base; + inserter->iface.prefix = (SerdPrefixSink)serd_inserter_on_prefix; + inserter->iface.statement = (SerdStatementSink)serd_inserter_on_statement; + return inserter; +} + +void +serd_inserter_free(SerdInserter* inserter) +{ + if (inserter) { + serd_node_free(inserter->default_graph); + free(inserter); + } +} + +const SerdSink* +serd_inserter_get_sink(SerdInserter* inserter) +{ + return &inserter->iface; +} diff --git a/src/inserter.h b/src/inserter.h new file mode 100644 index 00000000..2f3e4239 --- /dev/null +++ b/src/inserter.h @@ -0,0 +1,31 @@ +/* + Copyright 2011-2018 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_INSERTER_H +#define SERD_INSERTER_H + +#include "sink.h" + +#include "serd/serd.h" + +struct SerdInserterImpl { + SerdModel* model; + SerdEnv* env; + SerdNode* default_graph; + SerdSink iface; +}; + +#endif // SERD_INSERTER_H diff --git a/src/iter.c b/src/iter.c new file mode 100644 index 00000000..71df42c1 --- /dev/null +++ b/src/iter.c @@ -0,0 +1,238 @@ +/* + Copyright 2011-2018 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "iter.h" + +#include "log.h" +#include "model.h" +#include "statement.h" +#include "world.h" + +#include "serd/serd.h" +#include "zix/btree.h" + +#include <assert.h> +#include <stdlib.h> +#include <string.h> + +static bool +serd_iter_pattern_matches(const SerdIter* iter, const SerdQuad pat) +{ + const SerdStatement* key = (const SerdStatement*)zix_btree_get(iter->cur); + for (int i = 0; i < iter->n_prefix; ++i) { + const int idx = orderings[iter->order][i]; + if (!serd_node_pattern_match(key->nodes[idx], pat[idx])) { + return false; + } + } + + return true; +} + +/** + Seek forward as necessary until `iter` points at a matching statement. + + @return true iff iterator reached end of valid range. +*/ +static bool +serd_iter_seek_match(SerdIter* iter, const SerdQuad pat) +{ + for (; !zix_btree_iter_is_end(iter->cur); + zix_btree_iter_increment(iter->cur)) { + const SerdStatement* s = (const SerdStatement*)zix_btree_get(iter->cur); + if (serd_statement_matches_quad(s, pat)) { + return false; // Found match + } else if (iter->mode == FILTER_RANGE && + !serd_iter_pattern_matches(iter, pat)) { + zix_btree_iter_free(iter->cur); + iter->cur = NULL; + return true; // Reached end of range + } + } + + assert(zix_btree_iter_is_end(iter->cur)); + return true; // Reached end of index +} + +static bool +check_version(const SerdIter* const iter) +{ + if (iter->version != iter->model->version) { + serd_world_errorf(iter->model->world, + SERD_ERR_BAD_ITER, + "attempt to use invalidated iterator\n"); + return false; + } + return true; +} + +SerdIter* +serd_iter_new(const SerdModel* model, + ZixBTreeIter* cur, + const SerdQuad pat, + SerdOrder order, + SearchMode mode, + int n_prefix) +{ + SerdIter* iter = (SerdIter*)calloc(1, sizeof(SerdIter)); + iter->model = model; + iter->cur = cur; + iter->order = order; + iter->mode = mode; + iter->n_prefix = n_prefix; + iter->version = model->version; + + switch (iter->mode) { + case ALL: + case RANGE: + assert(zix_btree_iter_is_end(iter->cur) || + serd_statement_matches_quad( + ((const SerdStatement*)zix_btree_get(iter->cur)), pat)); + break; + case FILTER_RANGE: + case FILTER_ALL: + serd_iter_seek_match(iter, pat); + break; + } + + // Replace (possibly temporary) nodes in pattern with nodes from the model + if (!zix_btree_iter_is_end(iter->cur)) { + const SerdStatement* s = (const SerdStatement*)zix_btree_get(iter->cur); + for (int i = 0; i < TUP_LEN; ++i) { + if (pat[i]) { + iter->pat[i] = s->nodes[i]; + } + } + } + +#ifdef SERD_DEBUG_ITER + if (!zix_btree_iter_is_end(iter->cur)) { + const SerdStatement* statement = serd_iter_get(iter); + SERD_ITER_LOG("New %p pat=" TUP_FMT " realpat=" TUP_FMT " order=%d prefix=%d mode=%d cur=" TUP_FMT "\n", + (void*)iter, + TUP_FMT_ARGS(pat), + TUP_FMT_ARGS(iter->pat), + iter->order, + iter->n_prefix, + iter->mode, + TUP_FMT_ARGS(statement->nodes)); + } else { + SERD_ITER_LOG("New %p pat=" TUP_FMT " mode=%d at end\n", + (void*)iter, + TUP_FMT_ARGS(pat), + iter->mode); + } +#endif + + return iter; +} + +SerdIter* +serd_iter_copy(const SerdIter* iter) +{ + if (!iter) { + return NULL; + } + + SerdIter* copy = (SerdIter*)malloc(sizeof(SerdIter)); + memcpy(copy, iter, sizeof(SerdIter)); + copy->cur = zix_btree_iter_copy(iter->cur); + return copy; +} + +const SerdStatement* +serd_iter_get(const SerdIter* iter) +{ + return check_version(iter) ? (const SerdStatement*)zix_btree_get(iter->cur) + : NULL; +} + +bool +serd_iter_scan_next(SerdIter* iter) +{ + if (zix_btree_iter_is_end(iter->cur)) { + return true; + } + + switch (iter->mode) { + case ALL: + // At the end if the cursor is (assigned above) + break; + case RANGE: + SERD_ITER_LOG("%p range next\n", (void*)iter); + // At the end if the MSNs no longer match + if (!serd_iter_pattern_matches(iter, iter->pat)) { + zix_btree_iter_free(iter->cur); + iter->cur = NULL; + SERD_ITER_LOG("%p reached non-match end\n", (void*)iter); + } + break; + case FILTER_RANGE: + case FILTER_ALL: + // Seek forward to next match + serd_iter_seek_match(iter, iter->pat); + break; + } + + if (zix_btree_iter_is_end(iter->cur)) { + SERD_ITER_LOG("%p reached end\n", (void*)iter); + return true; + } else { +#ifdef SERD_DEBUG_ITER + SERD_ITER_LOG("%p reached " TUP_FMT "\n", + (void*)iter, + TUP_FMT_ARGS(serd_iter_get(iter)->nodes)); +#endif + return false; + } +} + +bool +serd_iter_next(SerdIter* iter) +{ + if (zix_btree_iter_is_end(iter->cur) || !check_version(iter)) { + return true; + } + + zix_btree_iter_increment(iter->cur); + return serd_iter_scan_next(iter); +} + +bool +serd_iter_equals(const SerdIter* lhs, const SerdIter* rhs) +{ + if (!lhs && !rhs) { + return true; + } + + return (lhs && rhs && lhs->model == rhs->model && + zix_btree_iter_equals(lhs->cur, rhs->cur) && + serd_node_pattern_match(lhs->pat[0], rhs->pat[0]) && + serd_node_pattern_match(lhs->pat[1], rhs->pat[1]) && + serd_node_pattern_match(lhs->pat[2], rhs->pat[2]) && + serd_node_pattern_match(lhs->pat[3], rhs->pat[3]) && + lhs->order == rhs->order && lhs->mode == rhs->mode && + lhs->n_prefix == rhs->n_prefix); +} + +void +serd_iter_free(SerdIter* iter) +{ + if (iter) { + zix_btree_iter_free(iter->cur); + free(iter); + } +} diff --git a/src/iter.h b/src/iter.h new file mode 100644 index 00000000..ac3e4b16 --- /dev/null +++ b/src/iter.h @@ -0,0 +1,88 @@ +/* + Copyright 2011-2018 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_ITER_H +#define SERD_ITER_H + +#include "statement.h" + +#include "serd/serd.h" +#include "zix/btree.h" + +#include <stdbool.h> + +/** Triple ordering */ +typedef enum { + SPO, ///< Subject, Predicate, Object + SOP, ///< Subject, Object, Predicate + OPS, ///< Object, Predicate, Subject + OSP, ///< Object, Subject, Predicate + PSO, ///< Predicate, Subject, Object + POS, ///< Predicate, Object, Subject + GSPO, ///< Graph, Subject, Predicate, Object + GSOP, ///< Graph, Subject, Object, Predicate + GOPS, ///< Graph, Object, Predicate, Subject + GOSP, ///< Graph, Object, Subject, Predicate + GPSO, ///< Graph, Predicate, Subject, Object + GPOS ///< Graph, Predicate, Object, Subject +} SerdOrder; + +/** Mode for searching or iteration */ +typedef enum { + ALL, ///< Iterate over entire store + RANGE, ///< Iterate over range with equal prefix + FILTER_RANGE, ///< Iterate over range with equal prefix, filtering + FILTER_ALL ///< Iterate to end of store, filtering +} SearchMode; + +struct SerdIterImpl { + const SerdModel* model; ///< Model being iterated over + ZixBTreeIter* cur; ///< Current DB cursor + SerdQuad pat; ///< Pattern (in ordering order) + SerdOrder order; ///< Store order (which index) + SearchMode mode; ///< Iteration mode + int n_prefix; ///< Prefix for RANGE and FILTER_RANGE + uint64_t version; ///< Model version when iterator was created +}; + +#define NUM_ORDERS 12 +#define TUP_LEN 4 + +/** + Quads of indices for each order, from most to least significant + (array indexed by SordOrder) +*/ +static const int orderings[NUM_ORDERS][TUP_LEN] = { + { 0, 1, 2, 3 }, { 0, 2, 1, 3 }, // SPO, SOP + { 2, 1, 0, 3 }, { 2, 0, 1, 3 }, // OPS, OSP + { 1, 0, 2, 3 }, { 1, 2, 0, 3 }, // PSO, POS + { 3, 0, 1, 2 }, { 3, 0, 2, 1 }, // GSPO, GSOP + { 3, 2, 1, 0 }, { 3, 2, 0, 1 }, // GOPS, GOSP + { 3, 1, 0, 2 }, { 3, 1, 2, 0 } // GPSO, GPOS +}; + +SerdIter* serd_iter_new(const SerdModel* model, + ZixBTreeIter* cur, + const SerdQuad pat, + SerdOrder order, + SearchMode mode, + int n_prefix); + +bool serd_iter_scan_next(SerdIter* iter); + +bool serd_quad_match(const SerdQuad x, const SerdQuad y); + +#endif // SERD_ITER_H diff --git a/src/log.h b/src/log.h new file mode 100644 index 00000000..c60cb326 --- /dev/null +++ b/src/log.h @@ -0,0 +1,60 @@ +/* + Copyright 2011-2018 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_LOG_H +#define SERD_LOG_H + +#ifdef SERD_DEBUG_SEARCH +/** String name of each ordering (array indexed by SordOrder) */ +static const char* const order_names[] = { + "spo", "sop", "ops", "osp", "pso", "pos", + "gspo", "gsop", "gops", "gosp", "gpso", "gpos" +}; +#endif + +#define TUP_FMT "(%s %s %s %s)" +#define TUP_FMT_ELEM(e) ((e) ? serd_node_get_string(e) : "*") +#define TUP_FMT_ARGS(t) \ + TUP_FMT_ELEM((t)[0]), \ + TUP_FMT_ELEM((t)[1]), \ + TUP_FMT_ELEM((t)[2]), \ + TUP_FMT_ELEM((t)[3]) + +#define STATEMENT_FMT_ARGS(s) \ + TUP_FMT_ELEM(serd_statement_get_subject(s)), \ + TUP_FMT_ELEM(serd_statement_get_predicate(s)), \ + TUP_FMT_ELEM(serd_statement_get_object(s)), \ + TUP_FMT_ELEM(serd_statement_get_graph(s)) + +#define SERD_LOG(prefix, ...) fprintf(stderr, "serd." prefix ": " __VA_ARGS__) + +#ifdef SERD_DEBUG_ITER +# define SERD_ITER_LOG(...) SERD_LOG("iter", __VA_ARGS__) +#else +# define SERD_ITER_LOG(...) +#endif +#ifdef SERD_DEBUG_SEARCH +# define SERD_FIND_LOG(...) SERD_LOG("search", __VA_ARGS__) +#else +# define SERD_FIND_LOG(...) +#endif +#ifdef SERD_DEBUG_WRITE +# define SERD_WRITE_LOG(...) SERD_LOG("write", __VA_ARGS__) +#else +# define SERD_WRITE_LOG(...) +#endif + +#endif // SERD_LOG_H diff --git a/src/model.c b/src/model.c new file mode 100644 index 00000000..3bc55c93 --- /dev/null +++ b/src/model.c @@ -0,0 +1,651 @@ +/* + Copyright 2011-2018 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "model.h" + +#include "cursor.h" +#include "iter.h" +#include "log.h" +#include "node.h" +#include "range.h" +#include "statement.h" +#include "world.h" + +#include "zix/btree.h" + +#include <assert.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdlib.h> + +#define DEFAULT_ORDER SPO +#define DEFAULT_GRAPH_ORDER GSPO + +/** + Compare quads lexicographically, ignoring graph. + + NULL IDs (equal to 0) are treated as wildcards, always less than every + other possible ID, except itself. +*/ +static int +serd_triple_compare(const void* x, const void* y, void* user_data) +{ + const int* const ordering = (const int*)user_data; + const SerdStatement* const s = (const SerdStatement*)x; + const SerdStatement* const t = (const SerdStatement*)y; + + for (int i = 0; i < TUP_LEN; ++i) { + const int o = ordering[i]; + if (o != SERD_GRAPH) { + const int cmp = + serd_node_wildcard_compare(s->nodes[o], t->nodes[o]); + if (cmp) { + return cmp; + } + } + } + + return 0; +} + +/** + Compare quads lexicographically, with exact (non-wildcard) graph matching. +*/ +static int +serd_quad_compare(const void* x, const void* y, void* user_data) +{ + const SerdStatement* const s = (const SerdStatement*)x; + const SerdStatement* const t = (const SerdStatement*)y; + + // Compare graph without wildcard matching + const int cmp = serd_node_compare( + s->nodes[SERD_GRAPH], t->nodes[SERD_GRAPH]); + + return cmp ? cmp : serd_triple_compare(x, y, user_data); +} + +/** + Return true iff `serd` has an index for `order`. + If `graphs` is true, `order` will be modified to be the + corresponding order with a G prepended (so G will be the MSN). +*/ +static inline bool +serd_model_has_index(const SerdModel* model, + SerdOrder* order, + int* n_prefix, + bool graphs) +{ + if (graphs) { + *order = (SerdOrder)(*order + GSPO); + *n_prefix += 1; + } + + return model->indices[*order]; +} + +/** + Return the best available index for a pattern. + @param pat Pattern in standard (S P O G) order + @param mode Set to the (best) iteration mode for iterating over results + @param n_prefix Set to the length of the range prefix + (for `mode` == RANGE and `mode` == FILTER_RANGE) +*/ +static SerdOrder +serd_model_best_index(const SerdModel* model, + const SerdQuad pat, + SearchMode* mode, + int* n_prefix) +{ + const bool graph_search = (pat[SERD_GRAPH] != 0); + + const unsigned sig = ((pat[0] ? 1 : 0) * 0x100 + + (pat[1] ? 1 : 0) * 0x010 + + (pat[2] ? 1 : 0) * 0x001); + + SerdOrder good[2] = { (SerdOrder)-1, (SerdOrder)-1 }; + +#define PAT_CASE(sig, m, g0, g1, np) \ + case sig: \ + *mode = m; \ + good[0] = g0; \ + good[1] = g1; \ + *n_prefix = np; \ + break + + // Good orderings that don't require filtering + *mode = RANGE; + *n_prefix = 0; + switch (sig) { + case 0x000: + assert(graph_search); + *mode = RANGE; + *n_prefix = 1; + return DEFAULT_GRAPH_ORDER; + case 0x111: + *mode = RANGE; + *n_prefix = graph_search ? 4 : 3; + return graph_search ? DEFAULT_GRAPH_ORDER : DEFAULT_ORDER; + + PAT_CASE(0x001, RANGE, OPS, OSP, 1); + PAT_CASE(0x010, RANGE, POS, PSO, 1); + PAT_CASE(0x011, RANGE, OPS, POS, 2); + PAT_CASE(0x100, RANGE, SPO, SOP, 1); + PAT_CASE(0x101, RANGE, SOP, OSP, 2); + PAT_CASE(0x110, RANGE, SPO, PSO, 2); + } + + if (*mode == RANGE) { + if (serd_model_has_index(model, &good[0], n_prefix, graph_search)) { + return good[0]; + } else if (serd_model_has_index( + model, &good[1], n_prefix, graph_search)) { + return good[1]; + } + } + + // Not so good orderings that require filtering, but can + // still be constrained to a range + switch (sig) { + PAT_CASE(0x011, FILTER_RANGE, OSP, PSO, 1); + PAT_CASE(0x101, FILTER_RANGE, SPO, OPS, 1); + // SPO is always present, so 0x110 is never reached here + default: break; + } + + if (*mode == FILTER_RANGE) { + if (serd_model_has_index(model, &good[0], n_prefix, graph_search)) { + return good[0]; + } else if (serd_model_has_index( + model, &good[1], n_prefix, graph_search)) { + return good[1]; + } + } + + if (graph_search) { + *mode = FILTER_RANGE; + *n_prefix = 1; + return DEFAULT_GRAPH_ORDER; + } else { + *mode = FILTER_ALL; + return DEFAULT_ORDER; + } +} + +SerdModel* +serd_model_new(SerdWorld* world, SerdIndexOptions indices, SerdModelFlags flags) +{ + SerdModel* model = (SerdModel*)calloc(1, sizeof(struct SerdModelImpl)); + + indices |= SERD_SPO; // SPO index is mandatory + + model->world = world; + model->index_options = indices; + model->flags = flags; + + for (unsigned i = 0; i < (NUM_ORDERS / 2); ++i) { + const int* const ordering = orderings[i]; + const int* const g_ordering = orderings[i + (NUM_ORDERS / 2)]; + + if (indices & (1 << i)) { + model->indices[i] = + zix_btree_new((ZixComparator)serd_triple_compare, + (const void*)ordering, + NULL); + if (flags & SERD_MODEL_GRAPHS) { + model->indices[i + (NUM_ORDERS / 2)] = + zix_btree_new((ZixComparator)serd_quad_compare, + (const void*)g_ordering, + NULL); + } + } + } + + // Create end iterator + const SerdOrder order = model->indices[GSPO] ? GSPO : SPO; + ZixBTreeIter* cur = zix_btree_end(model->indices[order]); + const SerdQuad pat = { 0, 0, 0, 0 }; + + model->end = serd_iter_new(model, cur, pat, order, ALL, 0); + + return model; +} + +SerdModel* +serd_model_copy(const SerdModel* model) +{ + SerdModel* copy = + serd_model_new(model->world, model->index_options, model->flags); + + SerdRange* all = serd_model_all(model); + serd_model_add_range(copy, all); + serd_range_free(all); + + assert(serd_model_size(model) == serd_model_size(copy)); + return copy; +} + +SERD_API +bool +serd_model_equals(const SerdModel* a, const SerdModel* b) +{ + if (!a && !b) { + return true; + } else if (!a || !b || serd_model_size(a) != serd_model_size(b)) { + return false; + } + + SerdRange* ra = serd_model_all(a); + SerdRange* rb = serd_model_all(b); + + bool result = true; + while (!serd_range_empty(ra) && !serd_range_empty(rb)) { + if (!serd_statement_equals(serd_range_front(ra), + serd_range_front(rb))) { + result = false; + break; + } + + serd_range_next(ra); + serd_range_next(rb); + } + + result = result && serd_range_empty(ra) && serd_range_empty(rb); + + serd_range_free(ra); + serd_range_free(rb); + return result; +} + +static void +serd_model_drop_statement(SerdModel* model, SerdStatement* statement) + +{ + for (int i = 0; i < TUP_LEN; ++i) { + if (statement->nodes[i]) { + serd_nodes_deref(model->world->nodes, statement->nodes[i]); + } + } + + if (statement->cursor && statement->cursor->file) { + serd_nodes_deref(model->world->nodes, statement->cursor->file); + } + + free(statement->cursor); + free(statement); +} + +void +serd_model_free(SerdModel* model) +{ + if (!model) { + return; + } + + serd_iter_free(model->end); + + ZixBTree* index = model->indices[model->indices[DEFAULT_GRAPH_ORDER] + ? DEFAULT_GRAPH_ORDER + : DEFAULT_ORDER]; + + // Free statements + ZixBTreeIter* t = zix_btree_begin(index); + for (; !zix_btree_iter_is_end(t); zix_btree_iter_increment(t)) { + SerdStatement* s = (SerdStatement*)zix_btree_get(t); + free(s->cursor); + free(s); + } + zix_btree_iter_free(t); + + // Free indices + for (unsigned o = 0; o < NUM_ORDERS; ++o) { + if (model->indices[o]) { + zix_btree_free(model->indices[o]); + } + } + + free(model); +} + +SerdWorld* +serd_model_get_world(SerdModel* model) +{ + return model->world; +} + +size_t +serd_model_size(const SerdModel* model) +{ + const SerdOrder order = model->indices[GSPO] ? GSPO : SPO; + return zix_btree_size(model->indices[order]); +} + +bool +serd_model_empty(const SerdModel* model) +{ + return serd_model_size(model) == 0; +} + +SerdIter* +serd_model_begin(const SerdModel* model) +{ + if (serd_model_size(model) == 0) { + return serd_iter_copy(serd_model_end(model)); + } else { + const SerdOrder order = model->indices[GSPO] ? GSPO : SPO; + ZixBTreeIter* cur = zix_btree_begin(model->indices[order]); + const SerdQuad pat = { 0, 0, 0, 0 }; + return serd_iter_new(model, cur, pat, order, ALL, 0); + } +} + +const SerdIter* +serd_model_end(const SerdModel* model) +{ + return model->end; +} + +SerdRange* +serd_model_all(const SerdModel* model) +{ + return serd_range_new(serd_model_begin(model), + serd_iter_copy(serd_model_end(model))); +} + +SerdIter* +serd_model_find(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + const SerdQuad pat = { s, p, o, g }; + if (!pat[0] && !pat[1] && !pat[2] && !pat[3]) { + return serd_model_begin(model); + } + + SearchMode mode; + int n_prefix; + const SerdOrder index_order = + serd_model_best_index(model, pat, &mode, &n_prefix); + + SERD_FIND_LOG("Find " TUP_FMT " index=%s mode=%d n_prefix=%d\n", + TUP_FMT_ARGS(pat), + order_names[index_order], + mode, + n_prefix); + + ZixBTree* const db = model->indices[index_order]; + ZixBTreeIter* cur = NULL; + + if (mode == FILTER_ALL) { + // No prefix shared with an index at all, linear search (worst case) + SERD_FIND_LOG("warning: Linear model search\n"); + cur = zix_btree_begin(db); + } else if (mode == FILTER_RANGE) { + /* Some prefix, but filtering still required. Build a search pattern + with only the prefix to find the lower bound in log time. */ + SERD_FIND_LOG("warning: Linear range search\n"); + SerdQuad prefix_pat = { NULL, NULL, NULL, NULL }; + const int* const ordering = orderings[index_order]; + for (int i = 0; i < n_prefix; ++i) { + prefix_pat[ordering[i]] = pat[ordering[i]]; + } + zix_btree_lower_bound(db, prefix_pat, &cur); + } else { + // Ideal case, pattern matches an index with no filtering required + zix_btree_lower_bound(db, pat, &cur); + } + + if (zix_btree_iter_is_end(cur)) { + SERD_FIND_LOG("No match found, iterator at end\n"); + zix_btree_iter_free(cur); + return NULL; + } + + const SerdStatement* const key = (const SerdStatement*)zix_btree_get(cur); + if (!key || (mode == RANGE && !serd_statement_matches_quad(key, pat))) { + SERD_FIND_LOG("No match found, cursor at " TUP_FMT "\n", + TUP_FMT_ARGS(key->nodes)); + + zix_btree_iter_free(cur); + return NULL; + } + + return serd_iter_new(model, cur, pat, index_order, mode, n_prefix); +} + +SerdRange* +serd_model_range(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + if (!s && !p && !o && !g) { + return serd_range_new(serd_model_begin(model), + serd_iter_copy(serd_model_end(model))); + } + + SerdIter* begin = serd_model_find(model, s, p, o, g); + SerdIter* end = begin ? serd_iter_new(model, + NULL, + begin->pat, + begin->order, + begin->mode, + begin->n_prefix) + : NULL; + return serd_range_new(begin, end); +} + +const SerdNode* +serd_model_get(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + const SerdStatement* statement = + serd_model_get_statement(model, s, p, o, g); + + if (statement) { + if (!s) { + return serd_statement_get_subject(statement); + } else if (!p) { + return serd_statement_get_predicate(statement); + } else if (!o) { + return serd_statement_get_object(statement); + } + } + + return NULL; +} + +const SerdStatement* +serd_model_get_statement(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + if ((bool)s + (bool)p + (bool)o != 2) { + return NULL; + } + + SerdIter* const i = serd_model_find(model, s, p, o, g); + if (i && i->cur) { + const SerdStatement* statement = serd_iter_get(i); + serd_iter_free(i); + return statement; + } + + return NULL; +} + +size_t +serd_model_count(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + SerdRange* const range = serd_model_range(model, s, p, o, g); + const size_t count = serd_range_size(range); + serd_range_free(range); + return count; +} + +bool +serd_model_ask(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + SerdIter* iter = serd_model_find(model, s, p, o, g); + bool ret = (iter != NULL); + serd_iter_free(iter); + return ret; +} + +static SerdCursor* +serd_model_intern_cursor(SerdModel* model, const SerdCursor* cursor) +{ + if (cursor) { + SerdCursor* copy = (SerdCursor*)calloc(1, sizeof(SerdCursor)); + + copy->file = serd_nodes_intern(model->world->nodes, cursor->file); + copy->line = cursor->line; + copy->col = cursor->col; + return copy; + } + + return NULL; +} + +SerdStatus +serd_model_add_internal(SerdModel* model, + const SerdCursor* cursor, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + SerdStatement* statement = (SerdStatement*)calloc(1, sizeof(SerdStatement)); + + statement->nodes[0] = s; + statement->nodes[1] = p; + statement->nodes[2] = o; + statement->nodes[3] = g; + statement->cursor = serd_model_intern_cursor(model, cursor); + + SERD_WRITE_LOG("Add " TUP_FMT "\n", TUP_FMT_ARGS(statement->nodes)); + + bool added = false; + for (unsigned i = 0; i < NUM_ORDERS; ++i) { + if (model->indices[i]) { + if (!zix_btree_insert(model->indices[i], statement)) { + added = true; + } else if (i == GSPO) { + break; // Statement already indexed + } + } + } + + ++model->version; + if (added) { + return SERD_SUCCESS; + } + + serd_model_drop_statement(model, statement); + return SERD_FAILURE; +} + +SerdStatus +serd_model_add(SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + if (!s || !p || !o) { + return serd_world_errorf(model->world, + SERD_ERR_BAD_ARG, + "attempt to add statement with NULL field\n"); + } + + return serd_model_add_internal(model, + NULL, + serd_nodes_intern(model->world->nodes, s), + serd_nodes_intern(model->world->nodes, p), + serd_nodes_intern(model->world->nodes, o), + serd_nodes_intern(model->world->nodes, g)); +} + +SerdStatus +serd_model_insert(SerdModel* model, const SerdStatement* statement) +{ + return serd_model_add_internal(model, + serd_statement_get_cursor(statement), + serd_statement_get_subject(statement), + serd_statement_get_predicate(statement), + serd_statement_get_object(statement), + serd_statement_get_graph(statement)); +} + +SerdStatus +serd_model_add_range(SerdModel* model, SerdRange* range) +{ + SerdStatus st = SERD_SUCCESS; + for (; !st && !serd_range_empty(range); serd_range_next(range)) { + st = serd_model_insert(model, serd_range_front(range)); + } + + return st; +} + +SerdStatus +serd_model_erase(SerdModel* model, SerdIter* iter) +{ + const SerdStatement* statement = serd_iter_get(iter); + + SERD_WRITE_LOG("Remove " TUP_FMT "\n", TUP_FMT_ARGS(statement->nodes)); + + SerdStatement* removed = NULL; + for (unsigned i = 0; i < NUM_ORDERS; ++i) { + if (model->indices[i]) { + zix_btree_remove(model->indices[i], + statement, + (void**)&removed, + i == iter->order ? &iter->cur : NULL); + } + } + serd_iter_scan_next(iter); + + serd_model_drop_statement(model, removed); + iter->version = ++model->version; + + return SERD_SUCCESS; +} + +SerdStatus +serd_model_erase_range(SerdModel* model, SerdRange* range) +{ + while (!serd_range_empty(range)) { + serd_model_erase(model, range->begin); + } + + return SERD_SUCCESS; +} diff --git a/src/model.h b/src/model.h new file mode 100644 index 00000000..23151b63 --- /dev/null +++ b/src/model.h @@ -0,0 +1,53 @@ +/* + Copyright 2011-2018 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_MODEL_H +#define SERD_MODEL_H + +#include "serd/serd.h" +#include "zix/btree.h" + +#include <stddef.h> +#include <stdint.h> + +#define SERD_NUM_ORDERS 12 + +struct SerdModelImpl +{ + SerdWorld* world; + + SerdIndexOptions index_options; + SerdModelFlags flags; + + /** Index for each possible triple ordering (may or may not exist). + * Each index is a tree of SordQuad with the appropriate ordering. + */ + ZixBTree* indices[SERD_NUM_ORDERS]; + + SerdIter* end; + + uint64_t version; +}; + +SerdStatus +serd_model_add_internal(SerdModel* model, + const SerdCursor* cursor, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +#endif // SERD_MODEL_H @@ -65,6 +65,14 @@ serd_node_pad_size(const size_t n_bytes) } static const SerdNode* +serd_node_maybe_get_meta_c(const SerdNode* node) +{ + return (node->flags & (SERD_HAS_LANGUAGE | SERD_HAS_DATATYPE)) + ? (node + 1 + (serd_node_pad_size(node->n_bytes) / serd_node_align)) + : NULL; +} + +static const SerdNode* serd_node_get_meta_c(const SerdNode* node) { return node + 1 + (serd_node_pad_size(node->n_bytes) / serd_node_align); @@ -301,6 +309,33 @@ serd_node_equals(const SerdNode* a, const SerdNode* b) return false; } +int +serd_node_compare(const SerdNode* a, const SerdNode* b) +{ + if (a == b) { + return 0; + } else if (!a || !b) { + return (a < b) ? -1 : (a > b) ? 1 : 0; + } else if (a->type != b->type) { + return a->type - b->type; + } + + const int cmp = strcmp(serd_node_get_string(a), serd_node_get_string(b)); + if (cmp) { + return cmp; + } + + return serd_node_compare(serd_node_maybe_get_meta_c(a), + serd_node_maybe_get_meta_c(b)); +} + +/** Compare nodes, considering NULL a wildcard match. */ +int +serd_node_wildcard_compare(const SerdNode* a, const SerdNode* b) +{ + return (!a || !b) ? 0 : serd_node_compare(a, b); +} + static size_t serd_uri_string_length(const SerdURI* uri) { @@ -45,5 +45,6 @@ void serd_node_set(SerdNode** dst, const SerdNode* src); size_t serd_node_total_size(const SerdNode* node); void serd_node_zero_pad(SerdNode* node); SerdNode* serd_new_resolved_uri_i(const char* str, const SerdURI* base); +int serd_node_wildcard_compare(const SerdNode* a, const SerdNode* b); #endif // SERD_NODE_H diff --git a/src/range.c b/src/range.c new file mode 100644 index 00000000..f83855c1 --- /dev/null +++ b/src/range.c @@ -0,0 +1,248 @@ +/* + Copyright 2011-2018 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "range.h" + +#include "iter.h" +#include "log.h" +#include "model.h" +#include "sink.h" +#include "world.h" + +#include <stdlib.h> +#include <string.h> + +typedef enum { NAMED, ANON_S, ANON_O, LIST_S, LIST_O } NodeStyle; + +static SerdStatus +write_range_statement(const SerdSink* sink, + const SerdModel* model, + unsigned depth, + SerdStatementFlags flags, + const SerdStatement* statement); + +SerdRange* +serd_range_new(SerdIter* const begin, SerdIter* const end) +{ + SerdRange* range = (SerdRange*)malloc(sizeof(SerdRange)); + + range->begin = begin; + range->end = end; + + return range; +} + +SerdRange* +serd_range_copy(const SerdRange* range) +{ + SerdRange* copy = (SerdRange*)malloc(sizeof(SerdRange)); + + memcpy(copy, range, sizeof(SerdRange)); + copy->begin = serd_iter_copy(range->begin); + copy->end = serd_iter_copy(range->end); + + return copy; +} + +void +serd_range_free(SerdRange* range) +{ + serd_iter_free(range->begin); + serd_iter_free(range->end); + free(range); +} + +const SerdStatement* +serd_range_front(const SerdRange* range) +{ + return serd_iter_get(range->begin); +} + +bool +serd_range_equals(const SerdRange* lhs, const SerdRange* rhs) +{ + return (!lhs && !rhs) || + (lhs && rhs && serd_iter_equals(lhs->begin, rhs->begin) && + serd_iter_equals(lhs->end, rhs->end)); +} + +bool +serd_range_next(SerdRange* range) +{ + return serd_iter_next(range->begin); +} + +size_t +serd_range_size(const SerdRange* range) +{ + SerdRange* r = serd_range_copy(range); + uint64_t n = 0; + for (; !serd_range_empty(r); serd_range_next(r)) { + ++n; + } + serd_range_free(r); + return n; +} + +bool +serd_range_empty(const SerdRange* range) +{ + return serd_iter_equals(range->begin, range->end); +} + +const SerdIter* +serd_range_begin(const SerdRange* range) +{ + return range->begin; +} + +const SerdIter* +serd_range_end(const SerdRange* range) +{ + return range->end; +} + +static NodeStyle +get_node_style(const SerdModel* model, const SerdNode* object) +{ + const size_t num_as_object = + serd_model_count(model, NULL, NULL, object, NULL); + if (serd_node_get_type(object) != SERD_BLANK || num_as_object > 1) { + return NAMED; // Non-blank and/or blank referred to several times + } + + if (serd_model_ask(model, object, model->world->rdf_first, NULL, NULL) && + serd_model_ask(model, object, model->world->rdf_rest, NULL, NULL)) { + return num_as_object == 0 ? LIST_S : LIST_O; + } + + return num_as_object == 0 ? ANON_S : ANON_O; +} + +static SerdStatus +write_range_internal(const SerdSink* sink, + const unsigned depth, + const SerdStatementFlags flags, + SerdRange* range) +{ + SerdStatus st = SERD_SUCCESS; + const SerdModel* model = range && range->begin ? range->begin->model : NULL; + for (; !st && !serd_range_empty(range); serd_range_next(range)) { + st = write_range_statement( + sink, model, depth, 0, serd_range_front(range)); + } + + return st; +} + +static SerdStatus +write_list(const SerdSink* sink, + const SerdModel* model, + const unsigned depth, + const SerdNode* object) +{ + SerdStatus st = SERD_SUCCESS; + const SerdWorld* world = model->world; + const SerdNode* first = world->rdf_first; + const SerdNode* rest = world->rdf_rest; + const SerdNode* nil = world->rdf_nil; + + SerdIter* f = serd_model_find(model, object, first, NULL, NULL); + while (!st && f && !serd_node_equals(object, nil)) { + const SerdStatement* fs = serd_iter_get(f); + st = write_range_statement(sink, model, depth + 1, 0, fs); + serd_iter_free(f); + + SerdIter* const r = serd_model_find(model, object, rest, NULL, NULL); + const SerdNode* next = + r ? serd_statement_get_object(serd_iter_get(r)) : NULL; + + f = next ? serd_model_find(model, next, first, NULL, NULL) : NULL; + if (r && f) { + // This and next node are good, write rdf:next statement + st = serd_sink_write_statement(sink, 0, serd_iter_get(r)); + object = next; + } else { + // Terminate malformed list + st = serd_sink_write_nodes( + sink, 0, object, rest, nil, serd_statement_get_graph(fs)); + f = NULL; + } + + serd_iter_free(r); + } + + return st; +} + +static SerdStatus +write_range_statement(const SerdSink* sink, + const SerdModel* model, + const unsigned depth, + SerdStatementFlags flags, + const SerdStatement* statement) +{ + const SerdNode* subject = serd_statement_get_subject(statement); + const NodeStyle subject_style = get_node_style(model, subject); + + if (depth == 0 && (subject_style == ANON_O || subject_style == LIST_O)) { + return SERD_SUCCESS; // Skip subject that will be inlined somewhere + } else if (subject_style == ANON_S) { + flags |= SERD_EMPTY_S; // Write anonymous subjects in "[] p o" style + } + + const SerdNode* object = serd_statement_get_object(statement); + const NodeStyle object_style = get_node_style(model, object); + SerdStatus st = SERD_SUCCESS; + if (object_style == ANON_O) { + flags |= SERD_ANON_O_BEGIN; + + SerdRange* subrange = serd_model_range(model, object, NULL, NULL, NULL); + st = st ? st : serd_sink_write_statement(sink, flags, statement); + st = st ? st : write_range_internal(sink, depth + 1, 0, subrange); + st = st ? st : serd_sink_end(sink, object); + serd_range_free(subrange); + + return st; + } else if (object_style == LIST_O) { + flags |= SERD_LIST_O_BEGIN; + st = st ? st : serd_sink_write_statement(sink, flags, statement); + st = st ? st : write_list(sink, model, depth + 1, object); + return st; + } + + return serd_sink_write_statement(sink, flags, statement); +} + +SerdStatus +serd_range_serialise(const SerdRange* range, + const SerdSink* sink, + const SerdSerialisationFlags flags) +{ + SerdStatus st = SERD_SUCCESS; + SerdRange* const copy = serd_range_copy(range); + + if (flags & SERD_NO_INLINE_OBJECTS) { + for (; !st && !serd_range_empty(copy); serd_range_next(copy)) { + st = sink->statement(sink->handle, 0, serd_range_front(copy)); + } + } else { + st = write_range_internal(sink, 0, 0, copy); + } + + serd_range_free(copy); + return st; +} diff --git a/src/range.h b/src/range.h new file mode 100644 index 00000000..d07741ef --- /dev/null +++ b/src/range.h @@ -0,0 +1,36 @@ +/* + Copyright 2011-2018 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_RANGE_H +#define SERD_RANGE_H + +#include "iter.h" +#include "statement.h" + +#include "serd/serd.h" + +struct SerdRangeImpl { + SerdIter* begin; ///< Iterator to first statement in range + SerdIter* end; ///< Iterator to end of range +}; + +SerdRange* +serd_range_new(SerdIter* begin, SerdIter* end); + +bool +serd_range_scan_next(SerdRange* range); + +#endif // SERD_RANGE_H diff --git a/src/serdi.c b/src/serdi.c index 6259a6bf..2bef5e4a 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -51,10 +51,12 @@ print_usage(const char* name, bool error) fprintf(os, " -b Fast bulk output for large serialisations.\n"); fprintf(os, " -c PREFIX Chop PREFIX from matching blank node IDs.\n"); fprintf(os, " -e Eat input one character at a time.\n"); + fprintf(os, " -f Fast serialisation without object inlining\n"); fprintf(os, " -h Display this help and exit.\n"); fprintf(os, " -i SYNTAX Input syntax: turtle/ntriples/trig/nquads.\n"); fprintf(os, " -k BYTES Parser stack size.\n"); fprintf(os, " -l Lax (non-strict) parsing.\n"); + fprintf(os, " -m Build and serialise a model (no streaming).\n"); fprintf(os, " -o SYNTAX Output syntax: turtle/ntriples/nquads.\n"); fprintf(os, " -p PREFIX Add PREFIX to blank node IDs.\n"); fprintf(os, " -q Suppress all output except data.\n"); @@ -91,7 +93,9 @@ main(int argc, char** argv) bool ascii = false; bool bulk_read = true; bool bulk_write = false; + bool no_inline = false; bool lax = false; + bool use_model = false; bool quiet = false; long stack_size = 4194304; const char* add_prefix = NULL; @@ -108,10 +112,14 @@ main(int argc, char** argv) bulk_write = true; } else if (argv[a][1] == 'e') { bulk_read = false; + } else if (argv[a][1] == 'f') { + no_inline = true; } else if (argv[a][1] == 'h') { return print_usage(argv[0], false); } else if (argv[a][1] == 'l') { lax = true; + } else if (argv[a][1] == 'm') { + use_model = true; } else if (argv[a][1] == 'q') { quiet = true; } else if (argv[a][1] == 'v') { @@ -191,6 +199,9 @@ main(int argc, char** argv) const SerdStyleFlags output_style = (ascii ? SERD_STYLE_ASCII : 0); + const SerdSerialisationFlags serialisation_flags = + no_inline ? SERD_NO_INLINE_OBJECTS : 0; + SerdByteSink* byte_sink = serd_byte_sink_new( (SerdWriteFunc)fwrite, out_fd, bulk_write ? 4096 : 1); @@ -201,9 +212,20 @@ main(int argc, char** argv) (SerdWriteFunc)serd_byte_sink_write, byte_sink); - SerdReader* reader = serd_reader_new( - world, input_syntax, serd_writer_get_sink(writer), stack_size); + SerdReader* reader = NULL; + SerdModel* model = NULL; + SerdInserter* inserter = NULL; + const SerdSink* sink = NULL; + if (use_model) { + const SerdIndexOptions indices = SERD_SPO | (no_inline ? 0 : SERD_OPS); + model = serd_model_new(world, indices, input_has_graphs); + inserter = serd_inserter_new(model, env, NULL); + sink = serd_inserter_get_sink(inserter); + } else { + sink = serd_writer_get_sink(writer); + } + reader = serd_reader_new(world, input_syntax, sink, stack_size); serd_reader_set_strict(reader, !lax); if (quiet) { serd_world_set_error_sink(world, quiet_error_sink, NULL); @@ -239,6 +261,19 @@ main(int argc, char** argv) } serd_reader_finish(reader); + + if (status <= SERD_FAILURE && use_model) { + SerdSink* wsink = serd_writer_get_sink(writer); + serd_env_send_prefixes(env, wsink); + + SerdRange* range = serd_model_all(model); + status = serd_range_serialise(range, wsink, serialisation_flags); + serd_range_free(range); + } + + serd_node_free(input_name); + serd_inserter_free(inserter); + serd_model_free(model); serd_reader_free(reader); serd_writer_free(writer); serd_byte_sink_free(byte_sink); diff --git a/src/statement.c b/src/statement.c index 24440a38..347cfe78 100644 --- a/src/statement.c +++ b/src/statement.c @@ -51,3 +51,35 @@ serd_statement_get_cursor(const SerdStatement* statement) { return statement->cursor; } + +bool +serd_statement_equals(const SerdStatement* a, const SerdStatement* b) +{ + return (serd_node_equals(a->nodes[0], b->nodes[0]) && + serd_node_equals(a->nodes[1], b->nodes[1]) && + serd_node_equals(a->nodes[2], b->nodes[2]) && + serd_node_equals(a->nodes[3], b->nodes[3])); +} + +bool +serd_statement_matches(const SerdStatement* statement, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* graph) + +{ + return (serd_node_pattern_match(statement->nodes[0], subject) && + serd_node_pattern_match(statement->nodes[1], predicate) && + serd_node_pattern_match(statement->nodes[2], object) && + serd_node_pattern_match(statement->nodes[3], graph)); +} + +bool +serd_statement_matches_quad(const SerdStatement* statement, const SerdQuad quad) +{ + return serd_node_pattern_match(statement->nodes[0], quad[0]) && + serd_node_pattern_match(statement->nodes[1], quad[1]) && + serd_node_pattern_match(statement->nodes[2], quad[2]) && + serd_node_pattern_match(statement->nodes[3], quad[3]); +} diff --git a/src/statement.h b/src/statement.h index 7c8384e9..4a1056f2 100644 --- a/src/statement.h +++ b/src/statement.h @@ -19,6 +19,8 @@ #include "serd/serd.h" +#include <stdbool.h> + /** Quad of nodes (a statement), or a quad pattern. @@ -31,4 +33,14 @@ struct SerdStatementImpl { SerdCursor* cursor; }; +static inline bool +serd_node_pattern_match(const SerdNode* a, const SerdNode* b) +{ + return !a || !b || (a == b) || serd_node_equals(a, b); +} + +bool +serd_statement_matches_quad(const SerdStatement* statement, + const SerdQuad quad); + #endif // SERD_STATEMENT_H diff --git a/src/string.c b/src/string.c index f4df39ff..a1d3a0b5 100644 --- a/src/string.c +++ b/src/string.c @@ -38,6 +38,7 @@ serd_strerror(SerdStatus status) case SERD_ERR_UNKNOWN: return "Unknown error"; case SERD_ERR_BAD_SYNTAX: return "Invalid syntax"; case SERD_ERR_BAD_ARG: return "Invalid argument"; + case SERD_ERR_BAD_ITER: return "Invalid iterator"; case SERD_ERR_NOT_FOUND: return "Not found"; case SERD_ERR_ID_CLASH: return "Blank node ID clash"; case SERD_ERR_BAD_CURIE: return "Invalid CURIE"; diff --git a/src/world.c b/src/world.c index 7baeb4bc..52ef1bce 100644 --- a/src/world.c +++ b/src/world.c @@ -104,6 +104,7 @@ void serd_world_free(SerdWorld* world) { serd_node_free(world->blank_node); + serd_nodes_free(world->nodes); free(world); } diff --git a/src/writer.c b/src/writer.c index 24f00429..446bcb18 100644 --- a/src/writer.c +++ b/src/writer.c @@ -1014,7 +1014,7 @@ serd_writer_free(SerdWriter* writer) free(writer); } -const SerdSink* +SerdSink* serd_writer_get_sink(SerdWriter* writer) { return &writer->iface; diff --git a/src/zix/btree.c b/src/zix/btree.c index 7521086d..a55c4699 100644 --- a/src/zix/btree.c +++ b/src/zix/btree.c @@ -55,6 +55,7 @@ typedef struct { } ZixBTreeIterFrame; struct ZixBTreeIterImpl { + unsigned n_levels; ///< Maximum depth of stack unsigned level; ///< Current level in stack ZixBTreeIterFrame stack[]; ///< Position stack }; @@ -350,7 +351,9 @@ zix_btree_iter_new(const ZixBTree* const t) { const size_t s = t->height * sizeof(ZixBTreeIterFrame); - return (ZixBTreeIter*)calloc(1, sizeof(ZixBTreeIter) + s); + ZixBTreeIter* i = (ZixBTreeIter*)calloc(1, sizeof(ZixBTreeIter) + s); + i->n_levels = t->height; + return i; } ZIX_PRIVATE void @@ -712,12 +715,47 @@ zix_btree_begin(const ZixBTree* const t) return i; } +ZIX_API ZixBTreeIter* +zix_btree_end(const ZixBTree* const t) +{ + return zix_btree_iter_new(t); +} + +ZIX_API ZixBTreeIter* +zix_btree_iter_copy(const ZixBTreeIter* const i) +{ + if (!i) { + return NULL; + } + + const size_t s = i->n_levels * sizeof(ZixBTreeIterFrame); + + ZixBTreeIter* j = (ZixBTreeIter*)malloc(sizeof(ZixBTreeIter) + s); + memcpy(j, i, sizeof(ZixBTreeIter) + s); + return j; +} + ZIX_API bool zix_btree_iter_is_end(const ZixBTreeIter* const i) { return !i || i->stack[0].node == NULL; } +ZIX_API bool +zix_btree_iter_equals(const ZixBTreeIter* const lhs, const ZixBTreeIter* const rhs) +{ + if (zix_btree_iter_is_end(lhs) && zix_btree_iter_is_end(rhs)) { + return true; + } else if (!lhs || !rhs || lhs->n_levels != rhs->n_levels) { + return false; + } + + return !memcmp(lhs, + rhs, + sizeof(ZixBTreeIter) + + lhs->n_levels * sizeof(ZixBTreeIterFrame)); +} + ZIX_API void zix_btree_iter_increment(ZixBTreeIter* const i) { diff --git a/src/zix/btree.h b/src/zix/btree.h index 46daa24a..0f71d1e3 100644 --- a/src/zix/btree.h +++ b/src/zix/btree.h @@ -126,6 +126,27 @@ ZIX_API ZixBTreeIter* zix_btree_begin(const ZixBTree* t); /** + Return an iterator to the end of `t` (one past the last element). + + The returned iterator must be freed with zix_btree_iter_free(). +*/ +ZIX_API ZixBTreeIter* +zix_btree_end(const ZixBTree* t); + +/** + Return a new copy of `i`. +*/ +ZIX_API ZixBTreeIter* +zix_btree_iter_copy(const ZixBTreeIter* const i); + +/** + Return true iff `lhs` is equal to `rhs`. +*/ +ZIX_API bool +zix_btree_iter_equals(const ZixBTreeIter* const lhs, + const ZixBTreeIter* const rhs); + +/** Return true iff `i` is an iterator to the end of its tree. */ ZIX_API bool diff --git a/tests/model_test.c b/tests/model_test.c new file mode 100644 index 00000000..395da7c3 --- /dev/null +++ b/tests/model_test.c @@ -0,0 +1,837 @@ +/* + Copyright 2011-2018 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "serd/serd.h" + +#include "../src/log.h" +#include "test_utils.h" + +#define WILDCARD_NODE NULL + +#define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#" +#define RDF_FIRST NS_RDF "first" +#define RDF_REST NS_RDF "rest" + +static const unsigned n_objects_per = 2; + +typedef const SerdNode* Quad[4]; + +typedef struct +{ + Quad query; + int expected_num_results; +} QueryTest; + +static SerdNode* +uri(SerdWorld* world, int num) +{ + char str[] = "eg:000"; + snprintf(str + 3, 4, "%03d", num); + return serd_new_uri(str); +} + +static int +generate(SerdWorld* world, SerdModel* model, size_t n_quads, SerdNode* graph) +{ + for (size_t i = 0; i < n_quads; ++i) { + int num = (i * n_objects_per) + 1; + + SerdNode* ids[2 + n_objects_per]; + for (unsigned j = 0; j < 2 + n_objects_per; ++j) { + ids[j] = uri(world, num++); + } + + for (unsigned j = 0; j < n_objects_per; ++j) { + if (serd_model_add(model, ids[0], ids[1], ids[2 + j], graph)) { + FAIL("Failed to add quad\n"); + } + } + + for (unsigned j = 0; j < 2 + n_objects_per; ++j) { + serd_node_free(ids[j]); + } + } + + // Add some literals + + // (98 4 "hello") and (98 4 "hello"^^<5>) + SerdNode* hello = serd_new_string("hello"); + SerdNode* hello_gb = serd_new_plain_literal("hello", "en-gb"); + SerdNode* hello_us = serd_new_plain_literal("hello", "en-us"); + SerdNode* hello_t4 = serd_new_typed_literal("hello", uri(world, 4)); + SerdNode* hello_t5 = serd_new_typed_literal("hello", uri(world, 5)); + if (serd_model_add(model, uri(world, 98), uri(world, 4), hello, graph)) { + FAIL("Failed to add untyped literal\n"); + } + if (serd_model_add(model, uri(world, 98), uri(world, 4), hello_t5, graph)) { + FAIL("Failed to add typed literal\n"); + } + + // (96 4 "hello"^^<4>) and (96 4 "hello"^^<5>) + if (serd_model_add(model, uri(world, 96), uri(world, 4), hello_t4, graph)) { + FAIL("Failed to add typed literal\n"); + } + if (serd_model_add(model, uri(world, 96), uri(world, 4), hello_t5, graph)) { + FAIL("Failed to add typed literal\n"); + } + + // (94 5 "hello") and (94 5 "hello"@en-gb) + if (serd_model_add(model, uri(world, 94), uri(world, 5), hello, graph)) { + FAIL("Failed to add literal\n"); + } + if (serd_model_add(model, uri(world, 94), uri(world, 5), hello_gb, graph)) { + FAIL("Failed to add literal with language\n"); + } + + // (92 6 "hello"@en-us) and (92 6 "hello"@en-gb) + if (serd_model_add(model, uri(world, 92), uri(world, 6), hello_us, graph)) { + FAIL("Failed to add literal with language\n"); + } + if (serd_model_add(model, uri(world, 92), uri(world, 6), hello_gb, graph)) { + FAIL("Failed to add literal with language\n"); + } + + // (14 6 "bonjour"@fr) and (14 6 "salut"@fr) + SerdNode* bonjour = serd_new_plain_literal("bonjour", "fr"); + SerdNode* salut = serd_new_plain_literal("salut", "fr"); + if (serd_model_add(model, uri(world, 14), uri(world, 6), bonjour, graph)) { + FAIL("Failed to add literal with language\n"); + } + if (serd_model_add(model, uri(world, 14), uri(world, 6), salut, graph)) { + FAIL("Failed to add literal with language\n"); + } + + // Attempt to add duplicates + if (!serd_model_add(model, uri(world, 14), uri(world, 6), salut, graph)) { + FAIL("Successfully added duplicate statement\n"); + } + + // Add a blank node subject + SerdNode* ablank = serd_new_blank("ablank"); + if (serd_model_add(model, ablank, uri(world, 6), salut, graph)) { + FAIL("Failed to add blank subject statement\n"); + } + + // Add statement with URI object + if (serd_model_add(model, ablank, uri(world, 6), uri(world, 7), graph)) { + FAIL("Failed to add URI subject statement\n"); + } + + return EXIT_SUCCESS; +} + +static int +test_read(SerdWorld* world, SerdModel* model, SerdNode* g, const size_t n_quads) +{ + SerdIter* iter = serd_model_begin(model); + for (; !serd_iter_equals(iter, serd_model_end(model)); + serd_iter_next(iter)) { + const SerdStatement* statement = serd_iter_get(iter); + if (!serd_statement_get_subject(statement) || + !serd_statement_get_predicate(statement) || + !serd_statement_get_object(statement)) { + FAIL("Iterator points to null node\n"); + } + } + + // Attempt to increment past end + if (!serd_iter_next(iter)) { + FAIL("Successfully incremented past end\n"); + } + + serd_iter_free(iter); + + const char* s = "hello"; + SerdNode* plain_hello = serd_new_string(s); + SerdNode* type4_hello = serd_new_typed_literal(s, uri(world, 4)); + SerdNode* type5_hello = serd_new_typed_literal(s, uri(world, 5)); + SerdNode* gb_hello = serd_new_plain_literal(s, "en-gb"); + SerdNode* us_hello = serd_new_plain_literal(s, "en-us"); + +#define NUM_PATTERNS 18 + + QueryTest patterns[NUM_PATTERNS] = { + { { NULL, NULL, NULL }, (int)(n_quads * n_objects_per) + 12 }, + { { uri(world, 1), WILDCARD_NODE, WILDCARD_NODE }, 2 }, + { { uri(world, 9), uri(world, 9), uri(world, 9) }, 0 }, + { { uri(world, 1), uri(world, 2), uri(world, 4) }, 1 }, + { { uri(world, 3), uri(world, 4), WILDCARD_NODE }, 2 }, + { { WILDCARD_NODE, uri(world, 2), uri(world, 4) }, 1 }, + { { WILDCARD_NODE, WILDCARD_NODE, uri(world, 4) }, 1 }, + { { uri(world, 1), WILDCARD_NODE, WILDCARD_NODE }, 2 }, + { { uri(world, 1), WILDCARD_NODE, uri(world, 4) }, 1 }, + { { WILDCARD_NODE, uri(world, 2), WILDCARD_NODE }, 2 }, + { { uri(world, 98), uri(world, 4), plain_hello }, 1 }, + { { uri(world, 98), uri(world, 4), type5_hello }, 1 }, + { { uri(world, 96), uri(world, 4), type4_hello }, 1 }, + { { uri(world, 96), uri(world, 4), type5_hello }, 1 }, + { { uri(world, 94), uri(world, 5), plain_hello }, 1 }, + { { uri(world, 94), uri(world, 5), gb_hello }, 1 }, + { { uri(world, 92), uri(world, 6), gb_hello }, 1 }, + { { uri(world, 92), uri(world, 6), us_hello }, 1 } + }; + + Quad match = { uri(world, 1), uri(world, 2), uri(world, 4), g }; + if (!serd_model_ask(model, match[0], match[1], match[2], match[3])) { + FAILF("No match for " TUP_FMT "\n", TUP_FMT_ARGS(match)); + } + + Quad nomatch = { uri(world, 1), uri(world, 2), uri(world, 9), g }; + if (serd_model_ask(model, nomatch[0], nomatch[1], nomatch[2], nomatch[3])) { + FAILF("False match for " TUP_FMT "\n", TUP_FMT_ARGS(nomatch)); + } + + if (serd_model_get(model, NULL, NULL, uri(world, 3), g)) { + FAIL("Get *,*,3 succeeded\n"); + } else if (serd_model_get(model, uri(world, 1), uri(world, 99), NULL, g)) { + FAIL("Get 1,2,9 succeeded\n"); + } else if (!serd_node_equals( + serd_model_get( + model, uri(world, 1), uri(world, 2), NULL, g), + uri(world, 3))) { + FAIL("Get 1,2,* != 3\n"); + } else if (!serd_node_equals( + serd_model_get( + model, uri(world, 1), NULL, uri(world, 3), g), + uri(world, 2))) { + FAIL("Get 1,*,3 != 2\n"); + } else if (!serd_node_equals( + serd_model_get( + model, NULL, uri(world, 2), uri(world, 3), g), + uri(world, 1))) { + FAIL("Get *,2,3 != 1\n"); + } + + for (unsigned i = 0; i < NUM_PATTERNS; ++i) { + QueryTest test = patterns[i]; + Quad pat = { test.query[0], test.query[1], test.query[2], g }; + + SerdRange* range = serd_model_range(model, pat[0], pat[1], pat[2], pat[3]); + int num_results = 0; + for (; !serd_range_empty(range); serd_range_next(range)) { + ++num_results; + if (!serd_statement_matches(serd_range_front(range), + pat[0], + pat[1], + pat[2], + pat[3])) { + serd_iter_free(iter); + FAILF("Query result " TUP_FMT " does not match pattern " TUP_FMT + "\n", + STATEMENT_FMT_ARGS(serd_range_front(range)), + TUP_FMT_ARGS(pat)); + } + } + serd_range_free(range); + + if (num_results != test.expected_num_results) { + FAILF("Expected %d results for " TUP_FMT ", got %d\n", + test.expected_num_results, + TUP_FMT_ARGS(pat), + num_results); + } + } + + // Query blank node subject + Quad pat = { serd_new_blank("ablank"), 0, 0 }; + int num_results = 0; + SerdRange* range = serd_model_range(model, pat[0], pat[1], pat[2], pat[3]); + for (; !serd_range_empty(range); serd_range_next(range)) { + ++num_results; + const SerdStatement* statement = serd_range_front(range); + if (!serd_statement_matches( + statement, pat[0], pat[1], pat[2], pat[3])) { + serd_range_free(range); + FAILF("Result for " TUP_FMT " does not match pattern\n", + TUP_FMT_ARGS(pat)); + } + } + serd_range_free(range); + + if (num_results != 2) { + FAIL("Blank node subject query failed\n"); + } + + // Test nested queries + const SerdNode* last_subject = 0; + range = serd_model_range(model, NULL, NULL, NULL, NULL); + for (; !serd_range_empty(range); serd_range_next(range)) { + const SerdStatement* statement = serd_range_front(range); + const SerdNode* subject = serd_statement_get_subject(statement); + if (subject == last_subject) { + continue; + } + + Quad subpat = { subject, 0, 0 }; + SerdRange* subrange = serd_model_range( + model, subpat[0], subpat[1], subpat[2], subpat[3]); + const SerdStatement* substatement = serd_range_front(subrange); + uint64_t num_sub_results = 0; + if (serd_statement_get_subject(substatement) != subject) { + FAIL("Incorrect initial submatch\n"); + } + for (; !serd_range_empty(subrange); serd_range_next(subrange)) { + if (!serd_statement_matches(serd_range_front(subrange), + subpat[0], + subpat[1], + subpat[2], + subpat[3])) { + serd_range_free(range); + serd_range_free(subrange); + FAIL("Nested query result does not match pattern\n"); + } + ++num_sub_results; + } + serd_range_free(subrange); + if (num_sub_results != n_objects_per) { + FAILF("Nested query " TUP_FMT " failed" + " (%d results, expected %d)\n", + TUP_FMT_ARGS(subpat), + num_sub_results, + n_objects_per); + } + + uint64_t count = serd_model_count(model, subject, 0, 0, 0); + if (count != num_sub_results) { + FAILF("Query " TUP_FMT " count %d does not match result count %d\n", + TUP_FMT_ARGS(subpat), + count, + num_sub_results); + } + + last_subject = subject; + } + serd_range_free(range); + + return 0; +} + +static SerdStatus +unexpected_error(void* handle, const SerdError* error) +{ + fprintf(stderr, "error: "); + vfprintf(stderr, error->fmt, *error->args); + return SERD_SUCCESS; +} + +static SerdStatus +expected_error(void* handle, const SerdError* error) +{ + fprintf(stderr, "expected: "); + vfprintf(stderr, error->fmt, *error->args); + return SERD_SUCCESS; +} + +static int +test_free_null(SerdWorld* world, const size_t n_quads) +{ + serd_model_free(NULL); // Shouldn't crash + return 0; +} + +static int +test_get_world(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, false); + if (serd_model_get_world(model) != world) { + FAIL("Model has incorrect world pointer\n"); + } + + serd_model_free(model); + return 0; +} + +static int +test_all_begin(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, false); + SerdRange* all = serd_model_all(model); + SerdIter* begin = serd_model_find(model, NULL, NULL, NULL, NULL); + if (!serd_iter_equals(serd_range_begin(all), begin)) { + FAIL("Model range does not start with begin iterator\n"); + } + + serd_range_free(all); + serd_iter_free(begin); + serd_model_free(model); + return 0; +} + +static int +test_add_null(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, false); + + serd_world_set_error_sink(world, expected_error, NULL); + if (!serd_model_add(model, 0, 0, 0, 0)) { + FAIL("Added NULL tuple\n"); + } else if (!serd_model_add(model, uri(world, 1), 0, 0, 0)) { + FAIL("Added tuple with NULL P and O\n"); + } else if (!serd_model_add(model, uri(world, 1), uri(world, 2), 0, 0)) { + FAIL("Added tuple with NULL O\n"); + } else if (!serd_model_empty(model)) { + FAIL("Model contains invalid statements\n"); + } + + serd_model_free(model); + return 0; +} + +static int +test_add_with_iterator(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, false); + + serd_world_set_error_sink(world, expected_error, NULL); + if (serd_model_add(model, uri(world, 1), uri(world, 2), uri(world, 3), 0)) { + FAIL("Failed to add statement\n"); + } + + // Add a statement with an active iterator + SerdIter* iter = serd_model_begin(model); + if (serd_model_add(model, uri(world, 1), uri(world, 2), uri(world, 4), 0)) { + FAIL("Failed to add statement with active iterator\n"); + } + + // Check that iterator has been invalidated + if (serd_iter_get(iter)) { + FAIL("Successfully dereferenced invalidated iterator\n"); + } else if (!serd_iter_next(iter)) { + FAIL("Successfully incremented invalidated iterator\n"); + } + + serd_iter_free(iter); + serd_model_free(model); + return 0; +} + +static int +test_erase_with_iterator(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, false); + + serd_world_set_error_sink(world, expected_error, NULL); + if (serd_model_add(model, uri(world, 1), uri(world, 2), uri(world, 3), 0) || + serd_model_add(model, uri(world, 4), uri(world, 5), uri(world, 6), 0)) { + FAIL("Failed to add statements\n"); + } + + // Erase a statement with an active iterator + SerdIter* iter1 = serd_model_begin(model); + SerdIter* iter2 = serd_model_begin(model); + if (serd_model_erase(model, iter1)) { + FAIL("Failed to erase statement\n"); + } + + // Check that erased iterator points to the next statement + if (!serd_statement_matches(serd_iter_get(iter1), + uri(world, 4), + uri(world, 5), + uri(world, 6), + 0)) { + FAIL("Erased iterator was not incremented\n"); + } + + // Check that other iterator has been invalidated + if (serd_iter_get(iter2)) { + FAIL("Successfully dereferenced invalidated iterator\n"); + } else if (!serd_iter_next(iter2)) { + FAIL("Successfully incremented invalidated iterator\n"); + } + + serd_iter_free(iter2); + serd_iter_free(iter1); + serd_model_free(model); + return 0; +} + +static int +test_add_erase(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, true); + + // Add (s p "hello") + SerdNode* s = uri(world, 1); + SerdNode* p = uri(world, 2); + SerdNode* hello = serd_new_string("hello"); + if (serd_model_add(model, s, p, hello, 0)) { + FAIL("Failed to add statement (s p \"hello\")\n"); + } else if (!serd_model_ask(model, s, p, hello, 0)) { + FAIL("Added statement (s p \"hello\") not found\n"); + } + + // Add (s p "hi") + SerdNode* hi = serd_new_string("hi"); + if (serd_model_add(model, s, p, hi, NULL)) { + FAIL("Failed to add statement (s p \"hi\")\n"); + } else if (!serd_model_ask(model, s, p, hi, 0)) { + FAIL("Added statement (s p \"hi\") not found\n"); + } + + // Erase (s p "hi") + SerdIter* iter = serd_model_find(model, s, p, hi, NULL); + if (serd_model_erase(model, iter)) { + FAIL("Failed to erase statement (s p \"hi\")\n"); + } else if (serd_model_size(model) != 1) { + FAILF("Expected 1 node after erasure, not %zu\n", + serd_model_size(model)); + } + serd_iter_free(iter); + + // Check that erased statement can not be found + SerdRange* empty = serd_model_range(model, s, p, hi, NULL); + if (!serd_range_empty(empty)) { + FAIL("Found removed statement (s p \"hi\")\n"); + } + serd_range_free(empty); + + serd_model_free(model); + return 0; +} + +static int +test_erase_all(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, false); + generate(world, model, n_quads, NULL); + + SerdIter* iter = serd_model_begin(model); + while (!serd_iter_equals(iter, serd_model_end(model))) { + const SerdStatus st = serd_model_erase(model, iter); + if (st) { + FAILF("Failed to erase tuple via iterator (%s)\n", + serd_strerror(st)); + } + } + + serd_iter_free(iter); + serd_model_free(model); + return 0; +} + +static int +test_copy(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, false); + generate(world, model, n_quads, NULL); + + SerdModel* copy = serd_model_copy(model); + if (!serd_model_equals(model, copy)) { + FAIL("Model copy does not equal original\n"); + } + + serd_model_free(model); + serd_model_free(copy); + return 0; +} + +static int +test_equals(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, false); + generate(world, model, n_quads, NULL); + serd_model_add( + model, uri(world, 0), uri(world, 1), uri(world, 2), uri(world, 3)); + + CHECK(serd_model_equals(NULL, NULL)); + CHECK(!serd_model_equals(NULL, model)); + CHECK(!serd_model_equals(model, NULL)); + + SerdModel* empty = serd_model_new(world, SERD_SPO, false); + if (serd_model_equals(model, empty)) { + FAIL("Non-empty model equals empty model\n"); + } + + SerdModel* different = serd_model_new(world, SERD_SPO, false); + generate(world, different, n_quads, NULL); + serd_model_add( + different, uri(world, 1), uri(world, 1), uri(world, 2), uri(world, 3)); + + if (serd_model_size(model) != serd_model_size(different)) { + FAIL("Models with same number of statements report different size\n"); + } else if (serd_model_equals(model, different)) { + FAIL("Models with different statements are equal\n"); + } + + serd_model_free(model); + serd_model_free(empty); + serd_model_free(different); + return 0; +} + +static int +test_find_past_end(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, false); + SerdNode* s = uri(world, 1); + SerdNode* p = uri(world, 2); + SerdNode* o = uri(world, 3); + if (serd_model_add(model, s, p, o, 0)) { + FAIL("Failed to add statement\n"); + } else if (!serd_model_ask(model, s, p, o, 0)) { + FAIL("Added statement not found\n"); + } + + SerdNode* huge = uri(world, 999); + if (!serd_range_empty(serd_model_range(model, huge, huge, huge, 0))) { + FAIL("Found statement past end of model\n"); + } + + serd_model_free(model); + return 0; +} + +static int +test_iter_comparison(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, false); + + serd_world_set_error_sink(world, expected_error, NULL); + if (serd_model_add(model, uri(world, 1), uri(world, 2), uri(world, 3), 0)) { + FAIL("Failed to add statement\n"); + } + + // Add a statement with an active iterator + SerdIter* iter1 = serd_model_begin(model); + SerdIter* iter2 = serd_model_begin(model); + if (!serd_iter_equals(iter1, iter2)) { + FAIL("Begin iterators are not equal\n"); + } + + serd_iter_next(iter1); + if (serd_iter_equals(iter1, iter2)) { + FAIL("Begin iterator equals incremented begin iterator\n"); + } + + const SerdIter* end = serd_model_end(model); + if (!serd_iter_equals(iter1, end)) { + FAIL("Iterator incremented to end does not equal end iterator\n"); + } + + serd_iter_free(iter2); + serd_iter_free(iter1); + serd_model_free(model); + return 0; +} + +static int +test_triple_index_read(SerdWorld* world, const size_t n_quads) +{ + static const char* const index_names[6] = { "spo", "sop", "ops", + "osp", "pso", "pos" }; + + for (unsigned i = 0; i < 6; ++i) { + SerdModel* model = serd_model_new(world, (1 << i), false); + generate(world, model, n_quads, 0); + if (test_read(world, model, 0, n_quads)) { + FAILF("Failure reading index `%s'\n", index_names[i]); + } + serd_model_free(model); + } + + return 0; +} + +static int +test_quad_index_read(SerdWorld* world, const size_t n_quads) +{ + static const char* const index_names[6] = { "gspo", "gsop", "gops", + "gosp", "gpso", "gpos" }; + + for (unsigned i = 0; i < 6; ++i) { + SerdModel* model = serd_model_new(world, (1 << i), true); + SerdNode* graph = uri(world, 42); + generate(world, model, n_quads, graph); + if (test_read(world, model, graph, n_quads)) { + FAILF("Failure reading index `%s'\n", index_names[i]); + } + serd_model_free(model); + } + + return 0; +} + +static int +test_remove_graph(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, true); + + // Generate a couple of graphs + SerdNode* graph42 = uri(world, 42); + SerdNode* graph43 = uri(world, 43); + generate(world, model, 1, graph42); + generate(world, model, 1, graph43); + + // Remove one graph via range + SerdStatus st; + SerdRange* range = serd_model_range(model, NULL, NULL, NULL, graph43); + if ((st = serd_model_erase_range(model, range))) { + FAILF("Remove range failed (%s)\n", serd_strerror(st)); + } + serd_range_free(range); + + // Erase the first tuple (an element in the default graph) + SerdIter* iter = serd_model_begin(model); + if (serd_model_erase(model, iter)) { + FAIL("Failed to erase begin iterator on non-empty model\n"); + } + serd_iter_free(iter); + + // Ensure only the other graph is left + Quad pat = { 0, 0, 0, graph42 }; + for (iter = serd_model_begin(model); + !serd_iter_equals(iter, serd_model_end(model)); + serd_iter_next(iter)) { + if (!serd_statement_matches( + serd_iter_get(iter), pat[0], pat[1], pat[2], pat[3])) { + FAIL("Graph removal via iteration failed\n"); + } + } + serd_iter_free(iter); + + serd_model_free(model); + return 0; +} + +static int +test_default_graph(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, true); + SerdNode* s = uri(world, 1); + SerdNode* p = uri(world, 2); + SerdNode* o = uri(world, 3); + SerdNode* g1 = uri(world, 101); + SerdNode* g2 = uri(world, 102); + + // Insert the same statement into two graphs + if (serd_model_add(model, s, p, o, g1)) { + FAIL("Failed to insert statement into first graph\n"); + } + if (serd_model_add(model, s, p, o, g2)) { + FAIL("Failed to insert statement into second graph\n"); + } + + // Ensure we only see statement once in the default graph + if (serd_model_count(model, s, p, o, NULL) != 1) { + FAIL("Found duplicate triple in default graph\n"); + } + + serd_model_free(model); + return 0; +} + +static int +test_write_bad_list(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_SPO, true); + SerdNodes* nodes = serd_nodes_new(); + const SerdNode* s = serd_nodes_manage(nodes, serd_new_uri("urn:s")); + const SerdNode* p = serd_nodes_manage(nodes, serd_new_uri("urn:p")); + const SerdNode* list1 = serd_nodes_manage(nodes, serd_new_blank("l1")); + const SerdNode* list2 = serd_nodes_manage(nodes, serd_new_blank("l2")); + const SerdNode* nofirst = serd_nodes_manage(nodes, serd_new_blank("nof")); + const SerdNode* norest = serd_nodes_manage(nodes, serd_new_blank("nor")); + const SerdNode* pfirst = serd_nodes_manage(nodes, serd_new_uri(RDF_FIRST)); + const SerdNode* prest = serd_nodes_manage(nodes, serd_new_uri(RDF_REST)); + const SerdNode* val1 = serd_nodes_manage(nodes, serd_new_string("a")); + const SerdNode* val2 = serd_nodes_manage(nodes, serd_new_string("b")); + + // List where second node has no rdf:first + serd_model_add(model, s, p, list1, NULL); + serd_model_add(model, list1, pfirst, val1, NULL); + serd_model_add(model, list1, prest, nofirst, NULL); + + // List where second node has no rdf:rest + serd_model_add(model, s, p, list2, NULL); + serd_model_add(model, list2, pfirst, val1, NULL); + serd_model_add(model, list2, prest, norest, NULL); + serd_model_add(model, norest, pfirst, val2, NULL); + + SerdBuffer buffer = {NULL, 0}; + SerdEnv* env = serd_env_new(NULL); + SerdWriter* writer = serd_writer_new( + world, SERD_TURTLE, 0, env, serd_buffer_sink, &buffer); + + SerdRange* all = serd_model_all(model); + serd_range_serialise(all, serd_writer_get_sink(writer), 0); + serd_range_free(all); + + serd_writer_finish(writer); + const char* str = serd_buffer_sink_finish(&buffer); + const char* expected = "<urn:s>\n" + " <urn:p> (\n" + " \"a\"\n" + " ) , (\n" + " \"a\"\n" + " \"b\"\n" + " ) .\n"; + + if (strcmp(str, expected)) { + FAIL("Failed to serialise invalid list\n"); + } + + free(buffer.buf); + serd_writer_free(writer); + serd_model_free(model); + serd_env_free(env); + serd_nodes_free(nodes); + return 0; +} + +int +main(int argc, char** argv) +{ + static const size_t n_quads = 300; + + serd_model_free(NULL); // Shouldn't crash + + typedef int (*TestFunc)(SerdWorld*, size_t); + + const TestFunc tests[] = { test_free_null, + test_get_world, + test_all_begin, + test_add_null, + test_add_with_iterator, + test_erase_with_iterator, + test_add_erase, + test_erase_all, + test_copy, + test_equals, + test_find_past_end, + test_iter_comparison, + test_triple_index_read, + test_quad_index_read, + test_remove_graph, + test_default_graph, + test_write_bad_list, + NULL }; + + SerdWorld* world = serd_world_new(); + for (const TestFunc* t = tests; *t; ++t) { + serd_world_set_error_sink(world, unexpected_error, NULL); + if ((*t)(world, n_quads)) { + return EXIT_FAILURE; + } + } + + serd_world_free(world); + return 0; +} @@ -33,6 +33,9 @@ def options(ctx): 'largefile': 'build with large file support on 32-bit systems', 'no-posix': 'do not use POSIX functions, even if present'}) + opt.add_option('--dump', type='string', default='', dest='dump', + help='dump debugging output (iter, search, write, all)') + def configure(conf): autowaf.display_header('Serd Configuration') conf.load('compiler_c', cache=True) @@ -64,6 +67,14 @@ def configure(conf): defines = ['_POSIX_C_SOURCE=200809L'], mandatory = False) + dump = Options.options.dump.split(',') + if 'all' in dump or 'iter' in dump: + conf.define('SERD_DEBUG_ITER', 1) + if 'all' in dump or 'search' in dump: + conf.define('SERD_DEBUG_SEARCH', 1) + if 'all' in dump or 'write' in dump: + conf.define('SERD_DEBUG_WRITE', 1) + autowaf.set_lib_env(conf, 'serd', SERD_VERSION) conf.write_config_header('serd_config.h', remove=False) @@ -79,9 +90,13 @@ lib_source = ['src/base64.c', 'src/byte_source.c', 'src/cursor.c', 'src/env.c', + 'src/inserter.c', + 'src/iter.c', + 'src/model.c', 'src/n3.c', 'src/node.c', 'src/nodes.c', + 'src/range.c', 'src/reader.c', 'src/sink.c', 'src/statement.c', @@ -91,6 +106,7 @@ lib_source = ['src/base64.c', 'src/uri.c', 'src/world.c', 'src/writer.c', + 'src/zix/btree.c', 'src/zix/digest.c', 'src/zix/hash.c'] @@ -153,7 +169,8 @@ def build(bld): ('serd_test', 'tests/serd_test.c'), ('read_chunk_test', 'tests/read_chunk_test.c'), ('nodes_test', 'tests/nodes_test.c'), - ('overflow_test', 'tests/overflow_test.c')]: + ('overflow_test', 'tests/overflow_test.c'), + ('model_test', 'tests/model_test.c')]: bld(features = 'c cprogram', source = prog[1], use = 'libserd_profiled', @@ -292,9 +309,17 @@ def show_diff(from_lines, to_lines, from_filename, to_filename): tofile=os.path.abspath(to_filename)): sys.stderr.write(line) -def check_output(out_filename, check_filename, subst_from='', subst_to=''): +def check_output(out_filename, check_filename, subst_from='', subst_to='', sort=False): if not os.access(out_filename, os.F_OK): Logs.pprint('RED', 'FAIL: output %s is missing' % out_filename) + elif sort: + out_lines = sorted(set(io.open(out_filename, encoding='utf-8').readlines())) + check_lines = sorted(set(io.open(check_filename, encoding='utf-8').readlines())) + if out_lines != check_lines: + show_diff(check_lines, out_lines, check_filename, out_filename) + return False + else: + return True elif not file_equals(check_filename, out_filename, subst_from, subst_to): Logs.pprint('RED', 'FAIL: %s != %s' % (os.path.abspath(out_filename), check_filename)) @@ -412,14 +437,17 @@ def test_suite(ctx, base_uri, testdir, report, isyntax, osyntax, options=''): # Check output against test suite check_uri = model[test][mf + 'result'][0] check_path = file_uri_to_path(check_uri) + sort = '-m' in options result = autowaf.run_test( ctx, APPNAME, - lambda: check_output(action + '.out', check_path), + lambda: check_output(action + '.out', check_path, sort=sort), True, name=str(action) + ' check', quiet=True) # Run round-trip tests - test_thru(ctx, uri, action, check_path, - ' '.join(next(thru_options_iter)), isyntax, osyntax, options, quiet=True) + if not sort: + test_thru(ctx, uri, action, check_path, + ' '.join(next(thru_options_iter)), + isyntax, osyntax, options, quiet=True) # Write test report entry if report is not None: @@ -489,7 +517,8 @@ def test(ctx): ['serd_test', 'read_chunk_test', 'nodes_test', - 'overflow_test'], + 'overflow_test', + 'model_test'], name='Unit') def test_syntax_io(in_name, expected_name, lang): @@ -553,7 +582,8 @@ def test(ctx): 'serdi_static "file://%s/tests/good/manifest.ttl" > /dev/full' % srcdir, 1, name='write_error') - run_test_suites(ctx, '') + for opts in ['', '-m']: + run_test_suites(ctx, opts) autowaf.post_test(ctx, APPNAME) |