From fdb360c094b575b59640206d7eea0772bb9de26d Mon Sep 17 00:00:00 2001 From: David Robillard Date: Sat, 12 May 2018 13:28:47 +0200 Subject: WIP: Add model --- NEWS | 1 + doc/serdi.1 | 11 +- serd/serd.h | 417 +++++++++++++++++++++++++++ src/inserter.c | 100 +++++++ src/iter.c | 206 ++++++++++++++ src/iter.h | 89 ++++++ src/model.c | 642 +++++++++++++++++++++++++++++++++++++++++ src/model.h | 42 +++ src/node.c | 7 + src/node.h | 1 + src/range.c | 255 +++++++++++++++++ src/range.h | 35 +++ src/serdi.c | 41 ++- src/statement.c | 75 +++++ src/statement.h | 12 + src/string.c | 1 + tests/model_test.c | 759 +++++++++++++++++++++++++++++++++++++++++++++++++ tests/sink_test.c | 127 +++++++++ tests/statement_test.c | 81 ++++++ wscript | 48 +++- 20 files changed, 2941 insertions(+), 9 deletions(-) create mode 100644 src/inserter.c create mode 100644 src/iter.c create mode 100644 src/iter.h create mode 100644 src/model.c create mode 100644 src/model.h create mode 100644 src/range.c create mode 100644 src/range.h create mode 100644 tests/model_test.c create mode 100644 tests/sink_test.c create mode 100644 tests/statement_test.c diff --git a/NEWS b/NEWS index 05d7fe8a..48441498 100644 --- a/NEWS +++ b/NEWS @@ -13,6 +13,7 @@ serd (1.0.0) unstable; * Simplify writer style options * Simplify streaming API and improve pretty printing * Add logging functions to public API + * Add model for storing statements in memory -- David Robillard Sat, 19 Jan 2019 13:31:12 +0100 diff --git a/doc/serdi.1 b/doc/serdi.1 index 166c32de..ca821a4c 100644 --- a/doc/serdi.1 +++ b/doc/serdi.1 @@ -1,4 +1,4 @@ -.TH SERDI 1 "05 Jan 2017" +.TH SERDI 1 "06 Jan 2019" .SH NAME .B serdi \- Read and write RDF syntax @@ -28,6 +28,13 @@ generated immediately as input arrives, rather than waiting until an entire page of input has arrived. With this option serdi uses one page less memory, but will likely be significantly slower. +.TP +\fB\-f\fR +Fast serialisation without object inlining. This affects only model +(non-streaming) serialisation, and disables searching and statement reordering +for pretty printing. Statements will be written in simple sorted order, which +is faster, but may result in less readable output in Turtle or TriG. + .TP \fB\-h\fR Print the command line options. @@ -74,7 +81,7 @@ Display version information and exit. Serdi was written by David Robillard .SH COPYRIGHT -Copyright \(co 2011-2017 David Robillard. +Copyright \(co 2011-2019 David Robillard. .br License: .br diff --git a/serd/serd.h b/serd/serd.h index 02698c7f..6d95ad22 100644 --- a/serd/serd.h +++ b/serd/serd.h @@ -73,6 +73,18 @@ typedef struct SerdCursorImpl SerdCursor; /// Lexical environment for relative URIs or CURIEs (base URI and namespaces) typedef struct SerdEnvImpl SerdEnv; +/// An indexed set of statements +typedef struct SerdModelImpl SerdModel; + +/// A statement sink that inserts into a model +typedef struct SerdInserterImpl SerdInserter; + +/// Model Iterator +typedef struct SerdIterImpl SerdIter; + +/// Model Range +typedef struct SerdRangeImpl SerdRange; + /// Streaming parser that reads a text stream and writes to a statement sink typedef struct SerdReaderImpl SerdReader; @@ -92,6 +104,7 @@ typedef enum { SERD_ERR_UNKNOWN, ///< Unknown error SERD_ERR_BAD_SYNTAX, ///< Invalid syntax SERD_ERR_BAD_ARG, ///< Invalid argument + SERD_ERR_BAD_ITER, ///< Use of invalidated iterator SERD_ERR_NOT_FOUND, ///< Not found SERD_ERR_ID_CLASH, ///< Encountered clashing blank node IDs SERD_ERR_BAD_CURIE, ///< Invalid CURIE (e.g. prefix does not exist) @@ -119,6 +132,14 @@ typedef enum { /// Bitwise OR of SerdStatementFlag values typedef uint32_t SerdStatementFlags; +/// Flags that control style for a model serialisation +typedef enum { + SERD_NO_INLINE_OBJECTS = 1 << 0, ///< Disable object inlining +} SerdSerialisationFlag; + +/// Bitwise OR of SerdStatementFlag values +typedef uint32_t SerdSerialisationFlags; + /** Type of a syntactic RDF node @@ -190,6 +211,21 @@ typedef enum { SERD_GRAPH = 3 ///< Graph ("context") } SerdField; +/// Indexing option +typedef enum { + SERD_INDEX_SPO = 1, ///< Subject, Predicate, Object + SERD_INDEX_SOP = 1 << 1, ///< Subject, Object, Predicate + SERD_INDEX_OPS = 1 << 2, ///< Object, Predicate, Subject + SERD_INDEX_OSP = 1 << 3, ///< Object, Subject, Predicate + SERD_INDEX_PSO = 1 << 4, ///< Predicate, Subject, Object + SERD_INDEX_POS = 1 << 5, ///< Predicate, Object, Subject + SERD_INDEX_GRAPHS = 1 << 6, ///< Support multiple graphs in model + SERD_STORE_CURSORS = 1 << 7 ///< Store original cursor of statements +} SerdModelFlag; + +/// Bitwise OR of SerdModelFlag values +typedef uint32_t SerdModelFlags; + /// A syntactic RDF node typedef struct SerdNodeImpl SerdNode; @@ -1249,12 +1285,268 @@ SERD_API void serd_nodes_deref(SerdNodes* nodes, const SerdNode* node); +/** + @} + @name Model + @{ +*/ + +/** + Create a new model + + @param world The world in which to make this model. + + @param flags Model options, including enabled indices, for example `SERD_SPO | + SERD_OPS`. Be sure to enable an index where the most significant node(s) + are not variables in your queries. For example, to make (? P O) queries, + enable either SERD_OPS or SERD_POS. +*/ +SERD_API +SerdModel* +serd_model_new(SerdWorld* world, SerdModelFlags flags); + +/// Return a deep copy of `model` +SERD_API +SerdModel* +serd_model_copy(const SerdModel* model); + +/// Return true iff `a` is equal to `b`, ignoring statement cursor metadata +SERD_API +bool +serd_model_equals(const SerdModel* a, const SerdModel* b); + +/// Close and free `model` +SERD_API +void +serd_model_free(SerdModel* model); + +/// Get the world associated with `model` +SERD_API +SerdWorld* +serd_model_get_world(SerdModel* model); + +/// Get the flags enabled on `model` +SERD_API +SerdModelFlags +serd_model_get_flags(const SerdModel* model); + +/// Return the number of statements stored in `model` +SERD_API +size_t +serd_model_size(const SerdModel* model); + +/// Return true iff there are no statements stored in `model` +SERD_API +bool +serd_model_empty(const SerdModel* model); + +/// Return an iterator to the start of `model` +SERD_API +SerdIter* +serd_model_begin(const SerdModel* model); + +/// Return an iterator to the end of `model` +SERD_API +const SerdIter* +serd_model_end(const SerdModel* model); + +/// Return a range of all statements in `model` +SERD_API +SerdRange* +serd_model_all(const SerdModel* model); + +/** + Search for statements by a quad pattern + + @return An iterator to the first match, or NULL if no matches found. +*/ +SERD_API +SerdIter* +serd_model_find(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +/** + Search for statements by a quad pattern + + @return A range containins all matching statements. +*/ +SERD_API +SerdRange* +serd_model_range(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +/** + Search for a single node that matches a pattern + + Exactly one of `s`, `p`, `o` must be NULL. + This function is mainly useful for predicates that only have one value. + @return The first matching node, or NULL if no matches are found. +*/ +SERD_API +const SerdNode* +serd_model_get(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +/** + Search for a single statement that matches a pattern + + This function is mainly useful for predicates that only have one value. + + @return The first matching statement, or NULL if none are found. +*/ +SERD_API +const SerdStatement* +serd_model_get_statement(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +/// Return true iff a statement exists +SERD_API +bool +serd_model_ask(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +/// Return the number of matching statements +SERD_API +size_t +serd_model_count(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +/** + Add a statement to a model from nodes + + This function fails if there are any active iterators on `model`. +*/ +SERD_API +SerdStatus +serd_model_add(SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +/** + Add a statement to a model + + This function fails if there are any active iterators on `model`. +*/ +SERD_API +SerdStatus +serd_model_insert(SerdModel* model, const SerdStatement* statement); + +/** + Add a range of statements to a model + + This function fails if there are any active iterators on `model`. +*/ +SERD_API +SerdStatus +serd_model_add_range(SerdModel* model, SerdRange* range); + +/** + Remove a quad from a model via an iterator + + Calling this function invalidates all iterators on `model` except `iter`. + + @param model The model which `iter` points to. + @param iter Iterator to the element to erase, which is incremented to the + next value on return. +*/ +SERD_API +SerdStatus +serd_model_erase(SerdModel* model, SerdIter* iter); + +/** + Remove a range from a model + + Calling this function invalidates all iterators on `model` except `iter`. + + @param model The model which `range` points to. + @param range Range to erase, which will be empty on return; +*/ +SERD_API +SerdStatus +serd_model_erase_range(SerdModel* model, SerdRange* range); + +/** + @} + @name Inserter + @{ +*/ + +/// Create an inserter for writing statements to a model +SERD_API +SerdInserter* +serd_inserter_new(SerdModel* model, + SerdEnv* env, + const SerdNode* default_graph); + +/// Free an inserter +SERD_API +void +serd_inserter_free(SerdInserter* inserter); + +/// Return a sink interface that adds statements via `inserter` +SERD_API +const SerdSink* +serd_inserter_get_sink(SerdInserter* inserter); + /** @} @name Statement @{ */ +/** + Create a new statement + + Note that, to minimise model overhead, statements do not own their nodes, so + they must have a longer lifetime than the statement for it to be valid. For + statements in models, this is the lifetime of the model. For user-created + statements, the simplest way to handle this is to use `SerdNodes`. + + @param s The subject + @param p The predicate ("key") + @param o The object ("value") + @param g The graph ("context") + @param cursor Optional cursor at the origin of this statement + @return A new statement that must be freed with serd_statement_free() +*/ +SERD_API +SerdStatement* +serd_statement_new(const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g, + const SerdCursor* cursor); + +/// Return a copy of `statement` +SERD_API +SerdStatement* +serd_statement_copy(const SerdStatement* statement); + +/// Free `statement` +SERD_API +void +serd_statement_free(SerdStatement* statement); + /// Return the given node in `statement` SERD_API const SerdNode* @@ -1285,6 +1577,131 @@ SERD_API const SerdCursor* serd_statement_get_cursor(const SerdStatement* statement); +/** + Return true iff `a` is equal to `b`, ignoring statement cursor metadata + + Only returns true if nodes are equivalent, does not perform wildcard matching. +*/ +SERD_API +bool +serd_statement_equals(const SerdStatement* a, const SerdStatement* b); + +/** + Return true iff `statement` matches the given pattern + + The matching rules are the same used for querying: nodes match if they are + equivalent, and NULL acts as a wildcard that matches any node. +*/ +SERD_API +bool +serd_statement_matches(const SerdStatement* statement, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* graph); + +/** + @} + @name Iterator + @{ +*/ + +/// Return a new copy of `iter` +SERD_API +SerdIter* +serd_iter_copy(const SerdIter* iter); + +/// Return the statement pointed to by `iter` +SERD_API +const SerdStatement* +serd_iter_get(const SerdIter* iter); + +/** + Increment `iter` to point to the next statement + + @return True iff `iter` has reached the end. +*/ +SERD_API +bool +serd_iter_next(SerdIter* iter); + +/// Return true iff `lhs` is equal to `rhs` +SERD_API +bool +serd_iter_equals(const SerdIter* lhs, const SerdIter* rhs); + +/// Free `iter` +SERD_API +void +serd_iter_free(SerdIter* iter); + +/** + @} + @name Range + @{ +*/ + +/// Return a new copy of `range` +SERD_API +SerdRange* +serd_range_copy(const SerdRange* range); + +/// Free `range` +SERD_API +void +serd_range_free(SerdRange* range); + +/// Return the first statement in `range`, or NULL if `range` is empty +SERD_API +const SerdStatement* +serd_range_front(const SerdRange* range); + +/// Return true iff `lhs` is equal to `rhs` +SERD_API +bool +serd_range_equals(const SerdRange* lhs, const SerdRange* rhs); + +/// Increment the start of `range` to point to the next statement +SERD_API +bool +serd_range_next(SerdRange* range); + +/// Return the number of statements in `range` +SERD_API +size_t +serd_range_size(const SerdRange* range); + +/// Return true iff there are no statements in `range` +SERD_API +bool +serd_range_empty(const SerdRange* range); + +/// Return an iterator to the start of `range` +SERD_API +const SerdIter* +serd_range_begin(const SerdRange* range); + +/// Return an iterator to the end of `range` +SERD_API +const SerdIter* +serd_range_end(const SerdRange* range); + +/** + Write `range` to `sink` + + The serialisation style can be controlled with `flags`. The default is to + write statements in an order suited for pretty-printing with Turtle or TriG + with as many objects written inline as possible. If + `SERD_NO_INLINE_OBJECTS` is given, a simple sorted stream is written + instead, which is significantly faster since no searching is required, but + can result in ugly output for Turtle or Trig. +*/ +SERD_API +SerdStatus +serd_range_serialise(const SerdRange* range, + const SerdSink* sink, + SerdSerialisationFlags flags); + /** @} @name Cursor diff --git a/src/inserter.c b/src/inserter.c new file mode 100644 index 00000000..03ca4e7e --- /dev/null +++ b/src/inserter.c @@ -0,0 +1,100 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "model.h" +#include "sink.h" +#include "statement.h" +#include "world.h" + +#include "serd/serd.h" + +#include + +struct SerdInserterImpl { + SerdModel* model; + SerdNode* default_graph; + SerdSink iface; +}; + +static const SerdNode* +manage_or_intern(SerdNodes* nodes, SerdNode* manage, const SerdNode* intern) +{ + return manage ? serd_nodes_manage(nodes, manage) + : serd_nodes_intern(nodes, intern); +} + +static SerdStatus +serd_inserter_on_statement(SerdInserter* inserter, + const SerdStatementFlags flags, + const SerdStatement* statement) +{ + (void)flags; + + const SerdNode* const subject = serd_statement_get_subject(statement); + const SerdNode* const predicate = serd_statement_get_predicate(statement); + const SerdNode* const object = serd_statement_get_object(statement); + const SerdNode* const graph = serd_statement_get_graph(statement); + + // Attempt to expand all nodes to eliminate CURIEs + SerdNode* const s = serd_env_expand(inserter->iface.env, subject); + SerdNode* const p = serd_env_expand(inserter->iface.env, predicate); + SerdNode* const o = serd_env_expand(inserter->iface.env, object); + SerdNode* const g = serd_env_expand(inserter->iface.env, graph); + + SerdNodes* const nodes = inserter->model->world->nodes; + const SerdNode* model_graph = manage_or_intern(nodes, g, graph); + if (!model_graph) { + model_graph = serd_nodes_intern(nodes, inserter->default_graph); + } + + const SerdStatus st = serd_model_add_internal( + inserter->model, + (inserter->model->flags & SERD_STORE_CURSORS) ? statement->cursor + : NULL, + manage_or_intern(nodes, s, subject), + manage_or_intern(nodes, p, predicate), + manage_or_intern(nodes, o, object), + model_graph); + + return st > SERD_FAILURE ? st : SERD_SUCCESS; +} + +SerdInserter* +serd_inserter_new(SerdModel* model, SerdEnv* env, const SerdNode* default_graph) +{ + SerdInserter* inserter = (SerdInserter*)calloc(1, sizeof(SerdInserter)); + inserter->model = model; + inserter->default_graph = serd_node_copy(default_graph); + inserter->iface.handle = inserter; + inserter->iface.env = env; + inserter->iface.statement = (SerdStatementFunc)serd_inserter_on_statement; + return inserter; +} + +void +serd_inserter_free(SerdInserter* inserter) +{ + if (inserter) { + serd_node_free(inserter->default_graph); + free(inserter); + } +} + +const SerdSink* +serd_inserter_get_sink(SerdInserter* inserter) +{ + return &inserter->iface; +} diff --git a/src/iter.c b/src/iter.c new file mode 100644 index 00000000..87c9dfaf --- /dev/null +++ b/src/iter.c @@ -0,0 +1,206 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "iter.h" + +#include "model.h" +#include "statement.h" +#include "world.h" + +#include "serd/serd.h" +#include "zix/btree.h" + +#include +#include +#include + +static bool +serd_iter_pattern_matches(const SerdIter* iter, const SerdQuad pat) +{ + const SerdStatement* key = (const SerdStatement*)zix_btree_get(iter->cur); + for (int i = 0; i < iter->n_prefix; ++i) { + const int idx = orderings[iter->order][i]; + if (!serd_node_pattern_match(key->nodes[idx], pat[idx])) { + return false; + } + } + + return true; +} + +/** + Seek forward as necessary until `iter` points at a matching statement. + + @return true iff iterator reached end of valid range. +*/ +static bool +serd_iter_seek_match(SerdIter* iter, const SerdQuad pat) +{ + for (; !zix_btree_iter_is_end(iter->cur); + zix_btree_iter_increment(iter->cur)) { + const SerdStatement* s = (const SerdStatement*)zix_btree_get(iter->cur); + if (serd_statement_matches_quad(s, pat)) { + return false; // Found match + } else if (iter->mode == FILTER_RANGE && + !serd_iter_pattern_matches(iter, pat)) { + zix_btree_iter_free(iter->cur); + iter->cur = NULL; + return true; // Reached end of range + } + } + + assert(zix_btree_iter_is_end(iter->cur)); + return true; // Reached end of index +} + +static bool +check_version(const SerdIter* const iter) +{ + if (iter->version != iter->model->version) { + serd_world_errorf(iter->model->world, + SERD_ERR_BAD_ITER, + "attempt to use invalidated iterator\n"); + return false; + } + return true; +} + +SerdIter* +serd_iter_new(const SerdModel* model, + ZixBTreeIter* cur, + const SerdQuad pat, + SerdOrder order, + SearchMode mode, + int n_prefix) +{ + SerdIter* iter = (SerdIter*)calloc(1, sizeof(SerdIter)); + iter->model = model; + iter->cur = cur; + iter->order = order; + iter->mode = mode; + iter->n_prefix = n_prefix; + iter->version = model->version; + + switch (iter->mode) { + case ALL: + case RANGE: + assert(zix_btree_iter_is_end(iter->cur) || + serd_statement_matches_quad( + ((const SerdStatement*)zix_btree_get(iter->cur)), pat)); + break; + case FILTER_RANGE: + case FILTER_ALL: + serd_iter_seek_match(iter, pat); + break; + } + + // Replace (possibly temporary) nodes in pattern with nodes from the model + if (!zix_btree_iter_is_end(iter->cur)) { + const SerdStatement* s = (const SerdStatement*)zix_btree_get(iter->cur); + for (int i = 0; i < TUP_LEN; ++i) { + if (pat[i]) { + iter->pat[i] = s->nodes[i]; + } + } + } + + return iter; +} + +SerdIter* +serd_iter_copy(const SerdIter* iter) +{ + if (!iter) { + return NULL; + } + + SerdIter* copy = (SerdIter*)malloc(sizeof(SerdIter)); + memcpy(copy, iter, sizeof(SerdIter)); + copy->cur = zix_btree_iter_copy(iter->cur); + return copy; +} + +const SerdStatement* +serd_iter_get(const SerdIter* iter) +{ + return check_version(iter) ? (const SerdStatement*)zix_btree_get(iter->cur) + : NULL; +} + +bool +serd_iter_scan_next(SerdIter* iter) +{ + if (zix_btree_iter_is_end(iter->cur)) { + return true; + } + + switch (iter->mode) { + case ALL: + // At the end if the cursor is (assigned above) + break; + case RANGE: + // At the end if the MSNs no longer match + if (!serd_iter_pattern_matches(iter, iter->pat)) { + zix_btree_iter_free(iter->cur); + iter->cur = NULL; + } + break; + case FILTER_RANGE: + case FILTER_ALL: + // Seek forward to next match + serd_iter_seek_match(iter, iter->pat); + break; + } + + return zix_btree_iter_is_end(iter->cur); +} + +bool +serd_iter_next(SerdIter* iter) +{ + if (zix_btree_iter_is_end(iter->cur) || !check_version(iter)) { + return true; + } + + zix_btree_iter_increment(iter->cur); + return serd_iter_scan_next(iter); +} + +bool +serd_iter_equals(const SerdIter* lhs, const SerdIter* rhs) +{ + if (!lhs && !rhs) { + return true; + } + + return (lhs && rhs && lhs->model == rhs->model && + zix_btree_iter_equals(lhs->cur, rhs->cur) && + serd_node_pattern_match(lhs->pat[0], rhs->pat[0]) && + serd_node_pattern_match(lhs->pat[1], rhs->pat[1]) && + serd_node_pattern_match(lhs->pat[2], rhs->pat[2]) && + serd_node_pattern_match(lhs->pat[3], rhs->pat[3]) && + lhs->order == rhs->order && lhs->mode == rhs->mode && + lhs->n_prefix == rhs->n_prefix); +} + +void +serd_iter_free(SerdIter* iter) +{ + if (iter) { + zix_btree_iter_free(iter->cur); + free(iter); + } +} diff --git a/src/iter.h b/src/iter.h new file mode 100644 index 00000000..13acca64 --- /dev/null +++ b/src/iter.h @@ -0,0 +1,89 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_ITER_H +#define SERD_ITER_H + +#include "statement.h" + +#include "serd/serd.h" +#include "zix/btree.h" + +#include +#include + +/** Triple ordering */ +typedef enum { + SPO, ///< Subject, Predicate, Object + SOP, ///< Subject, Object, Predicate + OPS, ///< Object, Predicate, Subject + OSP, ///< Object, Subject, Predicate + PSO, ///< Predicate, Subject, Object + POS, ///< Predicate, Object, Subject + GSPO, ///< Graph, Subject, Predicate, Object + GSOP, ///< Graph, Subject, Object, Predicate + GOPS, ///< Graph, Object, Predicate, Subject + GOSP, ///< Graph, Object, Subject, Predicate + GPSO, ///< Graph, Predicate, Subject, Object + GPOS ///< Graph, Predicate, Object, Subject +} SerdOrder; + +/** Mode for searching or iteration */ +typedef enum { + ALL, ///< Iterate over entire store + RANGE, ///< Iterate over range with equal prefix + FILTER_RANGE, ///< Iterate over range with equal prefix, filtering + FILTER_ALL ///< Iterate to end of store, filtering +} SearchMode; + +struct SerdIterImpl { + const SerdModel* model; ///< Model being iterated over + ZixBTreeIter* cur; ///< Current DB cursor + SerdQuad pat; ///< Pattern (in ordering order) + SerdOrder order; ///< Store order (which index) + SearchMode mode; ///< Iteration mode + int n_prefix; ///< Prefix for RANGE and FILTER_RANGE + uint64_t version; ///< Model version when iterator was created +}; + +#define NUM_ORDERS 12 +#define TUP_LEN 4 + +/** + Quads of indices for each order, from most to least significant + (array indexed by SordOrder) +*/ +static const int orderings[NUM_ORDERS][TUP_LEN] = { + { 0, 1, 2, 3 }, { 0, 2, 1, 3 }, // SPO, SOP + { 2, 1, 0, 3 }, { 2, 0, 1, 3 }, // OPS, OSP + { 1, 0, 2, 3 }, { 1, 2, 0, 3 }, // PSO, POS + { 3, 0, 1, 2 }, { 3, 0, 2, 1 }, // GSPO, GSOP + { 3, 2, 1, 0 }, { 3, 2, 0, 1 }, // GOPS, GOSP + { 3, 1, 0, 2 }, { 3, 1, 2, 0 } // GPSO, GPOS +}; + +SerdIter* serd_iter_new(const SerdModel* model, + ZixBTreeIter* cur, + const SerdQuad pat, + SerdOrder order, + SearchMode mode, + int n_prefix); + +bool serd_iter_scan_next(SerdIter* iter); + +bool serd_quad_match(const SerdQuad x, const SerdQuad y); + +#endif // SERD_ITER_H diff --git a/src/model.c b/src/model.c new file mode 100644 index 00000000..15234e45 --- /dev/null +++ b/src/model.c @@ -0,0 +1,642 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "model.h" + +#include "cursor.h" +#include "iter.h" +#include "node.h" +#include "range.h" +#include "statement.h" +#include "world.h" + +#include "zix/btree.h" +#include "zix/common.h" + +#include +#include +#include +#include + +#define DEFAULT_ORDER SPO +#define DEFAULT_GRAPH_ORDER GSPO + +/** + Compare quads lexicographically, ignoring graph. + + NULL IDs (equal to 0) are treated as wildcards, always less than every + other possible ID, except itself. +*/ +static int +serd_triple_compare(const void* x, const void* y, void* user_data) +{ + const int* const ordering = (const int*)user_data; + const SerdStatement* const s = (const SerdStatement*)x; + const SerdStatement* const t = (const SerdStatement*)y; + + for (int i = 0; i < TUP_LEN; ++i) { + const int o = ordering[i]; + if (o != SERD_GRAPH) { + const int cmp = + serd_node_wildcard_compare(s->nodes[o], t->nodes[o]); + if (cmp) { + return cmp; + } + } + } + + return 0; +} + +/** + Compare quads lexicographically, with exact (non-wildcard) graph matching. +*/ +static int +serd_quad_compare(const void* x, const void* y, void* user_data) +{ + const SerdStatement* const s = (const SerdStatement*)x; + const SerdStatement* const t = (const SerdStatement*)y; + + // Compare graph without wildcard matching + const int cmp = serd_node_compare( + s->nodes[SERD_GRAPH], t->nodes[SERD_GRAPH]); + + return cmp ? cmp : serd_triple_compare(x, y, user_data); +} + +/** + Return true iff `serd` has an index for `order`. + If `graphs` is true, `order` will be modified to be the + corresponding order with a G prepended (so G will be the MSN). +*/ +static inline bool +serd_model_has_index(const SerdModel* model, + SerdOrder* order, + int* n_prefix, + bool graphs) +{ + if (graphs) { + *order = (SerdOrder)(*order + GSPO); + *n_prefix += 1; + } + + return model->indices[*order]; +} + +/** + Return the best available index for a pattern. + @param pat Pattern in standard (S P O G) order + @param mode Set to the (best) iteration mode for iterating over results + @param n_prefix Set to the length of the range prefix + (for `mode` == RANGE and `mode` == FILTER_RANGE) +*/ +static SerdOrder +serd_model_best_index(const SerdModel* model, + const SerdQuad pat, + SearchMode* mode, + int* n_prefix) +{ + const bool graph_search = (pat[SERD_GRAPH] != 0); + + const unsigned sig = ((pat[0] ? 1 : 0) * 0x100 + + (pat[1] ? 1 : 0) * 0x010 + + (pat[2] ? 1 : 0) * 0x001); + + SerdOrder good[2] = { (SerdOrder)-1, (SerdOrder)-1 }; + +#define PAT_CASE(sig, m, g0, g1, np) \ + case sig: \ + *mode = m; \ + good[0] = g0; \ + good[1] = g1; \ + *n_prefix = np; \ + break + + // Good orderings that don't require filtering + *mode = RANGE; + *n_prefix = 0; + switch (sig) { + case 0x000: + assert(graph_search); + *mode = RANGE; + *n_prefix = 1; + return DEFAULT_GRAPH_ORDER; + case 0x111: + *mode = RANGE; + *n_prefix = graph_search ? 4 : 3; + return graph_search ? DEFAULT_GRAPH_ORDER : DEFAULT_ORDER; + + PAT_CASE(0x001, RANGE, OPS, OSP, 1); + PAT_CASE(0x010, RANGE, POS, PSO, 1); + PAT_CASE(0x011, RANGE, OPS, POS, 2); + PAT_CASE(0x100, RANGE, SPO, SOP, 1); + PAT_CASE(0x101, RANGE, SOP, OSP, 2); + PAT_CASE(0x110, RANGE, SPO, PSO, 2); + } + + if (*mode == RANGE) { + if (serd_model_has_index(model, &good[0], n_prefix, graph_search)) { + return good[0]; + } else if (serd_model_has_index( + model, &good[1], n_prefix, graph_search)) { + return good[1]; + } + } + + // Not so good orderings that require filtering, but can + // still be constrained to a range + switch (sig) { + PAT_CASE(0x011, FILTER_RANGE, OSP, PSO, 1); + PAT_CASE(0x101, FILTER_RANGE, SPO, OPS, 1); + // SPO is always present, so 0x110 is never reached here + default: break; + } + + if (*mode == FILTER_RANGE) { + if (serd_model_has_index(model, &good[0], n_prefix, graph_search)) { + return good[0]; + } else if (serd_model_has_index( + model, &good[1], n_prefix, graph_search)) { + return good[1]; + } + } + + if (graph_search) { + *mode = FILTER_RANGE; + *n_prefix = 1; + return DEFAULT_GRAPH_ORDER; + } else { + *mode = FILTER_ALL; + return DEFAULT_ORDER; + } +} + +SerdModel* +serd_model_new(SerdWorld* world, SerdModelFlags flags) +{ + SerdModel* model = (SerdModel*)calloc(1, sizeof(struct SerdModelImpl)); + + flags |= SERD_INDEX_SPO; // SPO index is mandatory + + model->world = world; + model->flags = flags; + + for (unsigned i = 0; i < (NUM_ORDERS / 2); ++i) { + const int* const ordering = orderings[i]; + const int* const g_ordering = orderings[i + (NUM_ORDERS / 2)]; + + if (flags & (1 << i)) { + model->indices[i] = + zix_btree_new((ZixComparator)serd_triple_compare, + (const void*)ordering, + NULL); + if (flags & SERD_INDEX_GRAPHS) { + model->indices[i + (NUM_ORDERS / 2)] = + zix_btree_new((ZixComparator)serd_quad_compare, + (const void*)g_ordering, + NULL); + } + } + } + + // Create end iterator + const SerdOrder order = model->indices[GSPO] ? GSPO : SPO; + ZixBTreeIter* cur = zix_btree_end(model->indices[order]); + const SerdQuad pat = { 0, 0, 0, 0 }; + + model->end = serd_iter_new(model, cur, pat, order, ALL, 0); + + return model; +} + +SerdModel* +serd_model_copy(const SerdModel* model) +{ + if (!model) { + return NULL; + } + + SerdModel* copy = serd_model_new(model->world, model->flags); + + SerdRange* all = serd_model_all(model); + serd_model_add_range(copy, all); + serd_range_free(all); + + assert(serd_model_size(model) == serd_model_size(copy)); + return copy; +} + +SERD_API +bool +serd_model_equals(const SerdModel* a, const SerdModel* b) +{ + if (!a && !b) { + return true; + } else if (!a || !b || serd_model_size(a) != serd_model_size(b)) { + return false; + } + + SerdRange* ra = serd_model_all(a); + SerdRange* rb = serd_model_all(b); + + bool result = true; + while (!serd_range_empty(ra) && !serd_range_empty(rb)) { + if (!serd_statement_equals(serd_range_front(ra), + serd_range_front(rb))) { + result = false; + break; + } + + serd_range_next(ra); + serd_range_next(rb); + } + + result = result && serd_range_empty(ra) && serd_range_empty(rb); + + serd_range_free(ra); + serd_range_free(rb); + return result; +} + +static void +serd_model_drop_statement(SerdModel* model, SerdStatement* statement) + +{ + for (int i = 0; i < TUP_LEN; ++i) { + if (statement->nodes[i]) { + serd_nodes_deref(model->world->nodes, statement->nodes[i]); + } + } + + if (statement->cursor && statement->cursor->file) { + serd_nodes_deref(model->world->nodes, statement->cursor->file); + } + + serd_statement_free(statement); +} + +void +serd_model_free(SerdModel* model) +{ + if (!model) { + return; + } + + serd_iter_free(model->end); + + ZixBTree* index = model->indices[model->indices[DEFAULT_GRAPH_ORDER] + ? DEFAULT_GRAPH_ORDER + : DEFAULT_ORDER]; + + // Free statements + ZixBTreeIter* t = zix_btree_begin(index); + for (; !zix_btree_iter_is_end(t); zix_btree_iter_increment(t)) { + serd_model_drop_statement(model, (SerdStatement*)zix_btree_get(t)); + } + zix_btree_iter_free(t); + + // Free indices + for (unsigned o = 0; o < NUM_ORDERS; ++o) { + if (model->indices[o]) { + zix_btree_free(model->indices[o]); + } + } + + free(model); +} + +SerdWorld* +serd_model_get_world(SerdModel* model) +{ + return model->world; +} + +SerdModelFlags +serd_model_get_flags(const SerdModel* model) +{ + return model->flags; +} + +size_t +serd_model_size(const SerdModel* model) +{ + const SerdOrder order = model->indices[GSPO] ? GSPO : SPO; + return zix_btree_size(model->indices[order]); +} + +bool +serd_model_empty(const SerdModel* model) +{ + return serd_model_size(model) == 0; +} + +SerdIter* +serd_model_begin(const SerdModel* model) +{ + if (serd_model_size(model) == 0) { + return serd_iter_copy(serd_model_end(model)); + } else { + const SerdOrder order = model->indices[GSPO] ? GSPO : SPO; + ZixBTreeIter* cur = zix_btree_begin(model->indices[order]); + const SerdQuad pat = { 0, 0, 0, 0 }; + return serd_iter_new(model, cur, pat, order, ALL, 0); + } +} + +const SerdIter* +serd_model_end(const SerdModel* model) +{ + return model->end; +} + +SerdRange* +serd_model_all(const SerdModel* model) +{ + return serd_range_new(serd_model_begin(model), + serd_iter_copy(serd_model_end(model))); +} + +SerdIter* +serd_model_find(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + const SerdQuad pat = { s, p, o, g }; + if (!pat[0] && !pat[1] && !pat[2] && !pat[3]) { + return serd_model_begin(model); + } + + SearchMode mode; + int n_prefix; + const SerdOrder index_order = + serd_model_best_index(model, pat, &mode, &n_prefix); + + ZixBTree* const db = model->indices[index_order]; + ZixBTreeIter* cur = NULL; + + if (mode == FILTER_ALL) { + // No prefix shared with an index at all, linear search (worst case) + cur = zix_btree_begin(db); + } else if (mode == FILTER_RANGE) { + /* Some prefix, but filtering still required. Build a search pattern + with only the prefix to find the lower bound in log time. */ + SerdQuad prefix_pat = { NULL, NULL, NULL, NULL }; + const int* const ordering = orderings[index_order]; + for (int i = 0; i < n_prefix; ++i) { + prefix_pat[ordering[i]] = pat[ordering[i]]; + } + zix_btree_lower_bound(db, prefix_pat, &cur); + } else { + // Ideal case, pattern matches an index with no filtering required + zix_btree_lower_bound(db, pat, &cur); + } + + if (zix_btree_iter_is_end(cur)) { + zix_btree_iter_free(cur); + return NULL; + } + + const SerdStatement* const key = (const SerdStatement*)zix_btree_get(cur); + if (!key || (mode == RANGE && !serd_statement_matches_quad(key, pat))) { + zix_btree_iter_free(cur); + return NULL; + } + + return serd_iter_new(model, cur, pat, index_order, mode, n_prefix); +} + +SerdRange* +serd_model_range(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + if (!s && !p && !o && !g) { + return serd_range_new(serd_model_begin(model), + serd_iter_copy(serd_model_end(model))); + } + + SerdIter* begin = serd_model_find(model, s, p, o, g); + SerdIter* end = begin ? serd_iter_new(model, + NULL, + begin->pat, + begin->order, + begin->mode, + begin->n_prefix) + : NULL; + return serd_range_new(begin, end); +} + +const SerdNode* +serd_model_get(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + const SerdStatement* statement = + serd_model_get_statement(model, s, p, o, g); + + if (statement) { + if (!s) { + return serd_statement_get_subject(statement); + } else if (!p) { + return serd_statement_get_predicate(statement); + } else if (!o) { + return serd_statement_get_object(statement); + } + } + + return NULL; +} + +const SerdStatement* +serd_model_get_statement(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + if ((bool)s + (bool)p + (bool)o != 2) { + return NULL; + } + + SerdIter* const i = serd_model_find(model, s, p, o, g); + if (i && i->cur) { + const SerdStatement* statement = serd_iter_get(i); + serd_iter_free(i); + return statement; + } + + return NULL; +} + +size_t +serd_model_count(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + SerdRange* const range = serd_model_range(model, s, p, o, g); + const size_t count = serd_range_size(range); + serd_range_free(range); + return count; +} + +bool +serd_model_ask(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + SerdIter* iter = serd_model_find(model, s, p, o, g); + bool ret = (iter != NULL); + serd_iter_free(iter); + return ret; +} + +static SerdCursor* +serd_model_intern_cursor(SerdModel* model, const SerdCursor* cursor) +{ + if (cursor) { + SerdCursor* copy = (SerdCursor*)calloc(1, sizeof(SerdCursor)); + + copy->file = serd_nodes_intern(model->world->nodes, cursor->file); + copy->line = cursor->line; + copy->col = cursor->col; + return copy; + } + + return NULL; +} + +SerdStatus +serd_model_add_internal(SerdModel* model, + const SerdCursor* cursor, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + SerdStatement* statement = (SerdStatement*)calloc(1, sizeof(SerdStatement)); + + statement->nodes[0] = s; + statement->nodes[1] = p; + statement->nodes[2] = o; + statement->nodes[3] = g; + statement->cursor = serd_model_intern_cursor(model, cursor); + + bool added = false; + for (unsigned i = 0; i < NUM_ORDERS; ++i) { + if (model->indices[i]) { + if (!zix_btree_insert(model->indices[i], statement)) { + added = true; + } else if (i == GSPO) { + break; // Statement already indexed + } + } + } + + ++model->version; + if (added) { + return SERD_SUCCESS; + } + + serd_model_drop_statement(model, statement); + return SERD_FAILURE; +} + +SerdStatus +serd_model_add(SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + if (!s || !p || !o) { + return serd_world_errorf(model->world, + SERD_ERR_BAD_ARG, + "attempt to add statement with NULL field\n"); + } + + return serd_model_add_internal(model, + NULL, + serd_nodes_intern(model->world->nodes, s), + serd_nodes_intern(model->world->nodes, p), + serd_nodes_intern(model->world->nodes, o), + serd_nodes_intern(model->world->nodes, g)); +} + +SerdStatus +serd_model_insert(SerdModel* model, const SerdStatement* statement) +{ + SerdNodes* nodes = model->world->nodes; + return serd_model_add_internal( + model, + serd_statement_get_cursor(statement), + serd_nodes_intern(nodes, serd_statement_get_subject(statement)), + serd_nodes_intern(nodes, serd_statement_get_predicate(statement)), + serd_nodes_intern(nodes, serd_statement_get_object(statement)), + serd_nodes_intern(nodes, serd_statement_get_graph(statement))); +} + +SerdStatus +serd_model_add_range(SerdModel* model, SerdRange* range) +{ + SerdStatus st = SERD_SUCCESS; + for (; !st && !serd_range_empty(range); serd_range_next(range)) { + st = serd_model_insert(model, serd_range_front(range)); + } + + return st; +} + +SerdStatus +serd_model_erase(SerdModel* model, SerdIter* iter) +{ + const SerdStatement* statement = serd_iter_get(iter); + + SerdStatement* removed = NULL; + for (int i = SPO; i <= GPOS; ++i) { + if (model->indices[i]) { + zix_btree_remove(model->indices[i], + statement, + (void**)&removed, + i == (int)iter->order ? &iter->cur : NULL); + } + } + serd_iter_scan_next(iter); + + serd_model_drop_statement(model, removed); + iter->version = ++model->version; + + return SERD_SUCCESS; +} + +SerdStatus +serd_model_erase_range(SerdModel* model, SerdRange* range) +{ + while (!serd_range_empty(range)) { + serd_model_erase(model, range->begin); + } + + return SERD_SUCCESS; +} diff --git a/src/model.h b/src/model.h new file mode 100644 index 00000000..02300483 --- /dev/null +++ b/src/model.h @@ -0,0 +1,42 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_MODEL_H +#define SERD_MODEL_H + +#include "serd/serd.h" +#include "zix/btree.h" + +#include + +struct SerdModelImpl +{ + SerdWorld* world; ///< World this model is a part of + SerdModelFlags flags; ///< Active indices and features + ZixBTree* indices[12]; ///< Trees of SordQuad + SerdIter* end; ///< End iterator (always the same) + uint64_t version; ///< Version incremented on every change +}; + +SerdStatus +serd_model_add_internal(SerdModel* model, + const SerdCursor* cursor, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +#endif // SERD_MODEL_H diff --git a/src/node.c b/src/node.c index 2482109b..682bb42a 100644 --- a/src/node.c +++ b/src/node.c @@ -327,6 +327,13 @@ serd_node_compare(const SerdNode* a, const SerdNode* b) serd_node_maybe_get_meta_c(b)); } +/** Compare nodes, considering NULL a wildcard match. */ +int +serd_node_wildcard_compare(const SerdNode* a, const SerdNode* b) +{ + return (!a || !b) ? 0 : serd_node_compare(a, b); +} + static size_t serd_uri_string_length(const SerdURI* uri) { diff --git a/src/node.h b/src/node.h index 53936ca3..afe3d93f 100644 --- a/src/node.h +++ b/src/node.h @@ -46,5 +46,6 @@ void serd_node_set(SerdNode** dst, const SerdNode* src); size_t serd_node_total_size(const SerdNode* node); void serd_node_zero_pad(SerdNode* node); SerdNode* serd_new_resolved_uri_i(const char* str, const SerdURI* base); +int serd_node_wildcard_compare(const SerdNode* a, const SerdNode* b); #endif // SERD_NODE_H diff --git a/src/range.c b/src/range.c new file mode 100644 index 00000000..c9d6f54b --- /dev/null +++ b/src/range.c @@ -0,0 +1,255 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "range.h" + +#include "iter.h" +#include "model.h" +#include "world.h" + +#include +#include +#include + +typedef enum { NAMED, ANON_S, ANON_O, LIST_S, LIST_O } NodeStyle; + +static SerdStatus +write_range_statement(const SerdSink* sink, + const SerdModel* model, + unsigned depth, + SerdStatementFlags flags, + const SerdStatement* statement); + +SerdRange* +serd_range_new(SerdIter* const begin, SerdIter* const end) +{ + SerdRange* range = (SerdRange*)malloc(sizeof(SerdRange)); + + range->begin = begin; + range->end = end; + + return range; +} + +SerdRange* +serd_range_copy(const SerdRange* range) +{ + SerdRange* copy = (SerdRange*)malloc(sizeof(SerdRange)); + + memcpy(copy, range, sizeof(SerdRange)); + copy->begin = serd_iter_copy(range->begin); + copy->end = serd_iter_copy(range->end); + + return copy; +} + +void +serd_range_free(SerdRange* range) +{ + serd_iter_free(range->begin); + serd_iter_free(range->end); + free(range); +} + +const SerdStatement* +serd_range_front(const SerdRange* range) +{ + return serd_iter_get(range->begin); +} + +bool +serd_range_equals(const SerdRange* lhs, const SerdRange* rhs) +{ + return (!lhs && !rhs) || + (lhs && rhs && serd_iter_equals(lhs->begin, rhs->begin) && + serd_iter_equals(lhs->end, rhs->end)); +} + +bool +serd_range_next(SerdRange* range) +{ + return serd_iter_next(range->begin); +} + +size_t +serd_range_size(const SerdRange* range) +{ + SerdRange* r = serd_range_copy(range); + uint64_t n = 0; + for (; !serd_range_empty(r); serd_range_next(r)) { + ++n; + } + serd_range_free(r); + return n; +} + +bool +serd_range_empty(const SerdRange* range) +{ + return !range || serd_iter_equals(range->begin, range->end); +} + +const SerdIter* +serd_range_begin(const SerdRange* range) +{ + return range->begin; +} + +const SerdIter* +serd_range_end(const SerdRange* range) +{ + return range->end; +} + +static NodeStyle +get_node_style(const SerdModel* model, const SerdNode* object) +{ + const size_t num_as_object = + serd_model_count(model, NULL, NULL, object, NULL); + if (serd_node_get_type(object) != SERD_BLANK || num_as_object > 1) { + return NAMED; // Non-blank and/or blank referred to several times + } + + if (serd_model_ask(model, object, model->world->rdf_first, NULL, NULL) && + serd_model_ask(model, object, model->world->rdf_rest, NULL, NULL)) { + return num_as_object == 0 ? LIST_S : LIST_O; + } + + return num_as_object == 0 ? ANON_S : ANON_O; +} + +static SerdStatus +write_range_internal(const SerdSink* sink, + const unsigned depth, + const SerdStatementFlags flags, + const SerdModel* model, + SerdRange* range) +{ + (void)flags; + + SerdStatus st = SERD_SUCCESS; + for (; !st && !serd_range_empty(range); serd_range_next(range)) { + st = write_range_statement( + sink, model, depth, 0, serd_range_front(range)); + } + + return st; +} + +static SerdStatus +write_list(const SerdSink* sink, + const SerdModel* model, + const unsigned depth, + const SerdNode* object) +{ + SerdStatus st = SERD_SUCCESS; + const SerdWorld* world = model->world; + const SerdNode* first = world->rdf_first; + const SerdNode* rest = world->rdf_rest; + const SerdNode* nil = world->rdf_nil; + + SerdIter* f = serd_model_find(model, object, first, NULL, NULL); + while (!st && f && !serd_node_equals(object, nil)) { + const SerdStatement* fs = serd_iter_get(f); + st = write_range_statement(sink, model, depth + 1, 0, fs); + serd_iter_free(f); + if (st) { + break; + } + + SerdIter* const r = serd_model_find(model, object, rest, NULL, NULL); + const SerdNode* next = + r ? serd_statement_get_object(serd_iter_get(r)) : NULL; + + f = next ? serd_model_find(model, next, first, NULL, NULL) : NULL; + if (r && f) { + // This and next node are good, write rdf:next statement + st = serd_sink_write_statement(sink, 0, serd_iter_get(r)); + object = next; + } else { + // Terminate malformed list + st = serd_sink_write( + sink, 0, object, rest, nil, serd_statement_get_graph(fs)); + f = NULL; + } + + serd_iter_free(r); + } + + return st; +} + +static SerdStatus +write_range_statement(const SerdSink* sink, + const SerdModel* model, + const unsigned depth, + SerdStatementFlags flags, + const SerdStatement* statement) +{ + const SerdNode* subject = serd_statement_get_subject(statement); + const NodeStyle subject_style = get_node_style(model, subject); + + if (depth == 0 && (subject_style == ANON_O || subject_style == LIST_O)) { + return SERD_SUCCESS; // Skip subject that will be inlined somewhere + } else if (subject_style == ANON_S) { + flags |= SERD_EMPTY_S; // Write anonymous subjects in "[] p o" style + } + + const SerdNode* object = serd_statement_get_object(statement); + const NodeStyle object_style = get_node_style(model, object); + SerdStatus st = SERD_SUCCESS; + if (object_style == ANON_O) { + flags |= SERD_ANON_O; + + SerdRange* range = serd_model_range(model, object, NULL, NULL, NULL); + st = st ? st : serd_sink_write_statement(sink, flags, statement); + st = st ? st : write_range_internal(sink, depth + 1, 0, model, range); + st = st ? st : serd_sink_write_end(sink, object); + serd_range_free(range); + + return st; + } else if (object_style == LIST_O) { + flags |= SERD_LIST_O; + st = st ? st : serd_sink_write_statement(sink, flags, statement); + st = st ? st : write_list(sink, model, depth + 1, object); + return st; + } + + return serd_sink_write_statement(sink, flags, statement); +} + +SerdStatus +serd_range_serialise(const SerdRange* range, + const SerdSink* sink, + const SerdSerialisationFlags flags) +{ + SerdStatus st = SERD_SUCCESS; + if (serd_range_empty(range)) { + return st; + } + + SerdRange* const copy = serd_range_copy(range); + if (flags & SERD_NO_INLINE_OBJECTS) { + for (; !st && !serd_range_empty(copy); serd_range_next(copy)) { + st = serd_sink_write_statement(sink, 0, serd_range_front(copy)); + } + } else { + st = write_range_internal(sink, 0, 0, range->begin->model, copy); + } + + serd_range_free(copy); + return st; +} diff --git a/src/range.h b/src/range.h new file mode 100644 index 00000000..a53f8c53 --- /dev/null +++ b/src/range.h @@ -0,0 +1,35 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_RANGE_H +#define SERD_RANGE_H + +#include "serd/serd.h" + +#include + +struct SerdRangeImpl { + SerdIter* begin; ///< Iterator to first statement in range + SerdIter* end; ///< Iterator to end of range +}; + +SerdRange* +serd_range_new(SerdIter* begin, SerdIter* end); + +bool +serd_range_scan_next(SerdRange* range); + +#endif // SERD_RANGE_H diff --git a/src/serdi.c b/src/serdi.c index 4fc9bfaf..9a1a1848 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -55,10 +55,12 @@ print_usage(const char* name, bool error) fprintf(os, " -b Fast bulk output for large serialisations.\n"); fprintf(os, " -c PREFIX Chop PREFIX from matching blank node IDs.\n"); fprintf(os, " -e Eat input one character at a time.\n"); + fprintf(os, " -f Fast serialisation without object inlining\n"); fprintf(os, " -h Display this help and exit.\n"); fprintf(os, " -i SYNTAX Input syntax: turtle/ntriples/trig/nquads.\n"); fprintf(os, " -k BYTES Parser stack size.\n"); fprintf(os, " -l Lax (non-strict) parsing.\n"); + fprintf(os, " -m Build and serialise a model (no streaming).\n"); fprintf(os, " -o SYNTAX Output syntax: turtle/ntriples/nquads.\n"); fprintf(os, " -p PREFIX Add PREFIX to blank node IDs.\n"); fprintf(os, " -q Suppress all output except data.\n"); @@ -97,7 +99,9 @@ main(int argc, char** argv) bool ascii = false; bool bulk_read = true; bool bulk_write = false; + bool no_inline = false; bool lax = false; + bool use_model = false; bool quiet = false; size_t stack_size = 4194304; const char* add_prefix = NULL; @@ -114,10 +118,14 @@ main(int argc, char** argv) bulk_write = true; } else if (argv[a][1] == 'e') { bulk_read = false; + } else if (argv[a][1] == 'f') { + no_inline = true; } else if (argv[a][1] == 'h') { return print_usage(argv[0], false); } else if (argv[a][1] == 'l') { lax = true; + } else if (argv[a][1] == 'm') { + use_model = true; } else if (argv[a][1] == 'q') { quiet = true; } else if (argv[a][1] == 'v') { @@ -204,6 +212,9 @@ main(int argc, char** argv) const SerdWriterFlags writer_flags = (ascii ? SERD_STYLE_ASCII : 0); + const SerdSerialisationFlags serialisation_flags = + no_inline ? SERD_NO_INLINE_OBJECTS : 0; + SerdByteSink* byte_sink = serd_byte_sink_new( (SerdWriteFunc)fwrite, out_fd, bulk_write ? 4096 : 1); @@ -214,9 +225,22 @@ main(int argc, char** argv) (SerdWriteFunc)serd_byte_sink_write, byte_sink); - SerdReader* reader = serd_reader_new( - world, input_syntax, serd_writer_get_sink(writer), stack_size); + SerdReader* reader = NULL; + SerdModel* model = NULL; + SerdInserter* inserter = NULL; + const SerdSink* sink = NULL; + if (use_model) { + const SerdModelFlags flags = + SERD_INDEX_SPO | (input_has_graphs ? SERD_INDEX_GRAPHS : 0) | + (no_inline ? 0 : SERD_INDEX_OPS); + model = serd_model_new(world, flags); + inserter = serd_inserter_new(model, env, NULL); + sink = serd_inserter_get_sink(inserter); + } else { + sink = serd_writer_get_sink(writer); + } + reader = serd_reader_new(world, input_syntax, sink, stack_size); serd_reader_set_strict(reader, !lax); if (quiet) { serd_world_set_message_sink(world, quiet_error_sink, NULL); @@ -252,6 +276,19 @@ main(int argc, char** argv) } serd_reader_finish(reader); + + if (status <= SERD_FAILURE && use_model) { + const SerdSink* wsink = serd_writer_get_sink(writer); + serd_env_write_prefixes(env, wsink); + + SerdRange* range = serd_model_all(model); + status = serd_range_serialise(range, wsink, serialisation_flags); + serd_range_free(range); + } + + serd_node_free(input_name); + serd_inserter_free(inserter); + serd_model_free(model); serd_reader_free(reader); serd_writer_free(writer); serd_byte_sink_free(byte_sink); diff --git a/src/statement.c b/src/statement.c index 24440a38..1da83fb1 100644 --- a/src/statement.c +++ b/src/statement.c @@ -16,6 +16,52 @@ #include "statement.h" +#include "cursor.h" + +#include +#include + +SerdStatement* +serd_statement_new(const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g, + const SerdCursor* cursor) +{ + SerdStatement* statement = (SerdStatement*)malloc(sizeof(SerdStatement)); + if (statement) { + statement->nodes[0] = s; + statement->nodes[1] = p; + statement->nodes[2] = o; + statement->nodes[3] = g; + statement->cursor = serd_cursor_copy(cursor); + } + return statement; +} + +SerdStatement* +serd_statement_copy(const SerdStatement* statement) +{ + if (!statement) { + return NULL; + } + + SerdStatement* copy = (SerdStatement*)malloc(sizeof(SerdStatement)); + memcpy(copy, statement, sizeof(SerdStatement)); + if (statement->cursor) { + copy->cursor = (SerdCursor*)malloc(sizeof(SerdCursor)); + memcpy(copy->cursor, statement->cursor, sizeof(SerdCursor)); + } + return copy; +} + +void +serd_statement_free(SerdStatement* statement) +{ + free(statement->cursor); + free(statement); +} + const SerdNode* serd_statement_get_node(const SerdStatement* statement, SerdField field) { @@ -51,3 +97,32 @@ serd_statement_get_cursor(const SerdStatement* statement) { return statement->cursor; } + +bool +serd_statement_equals(const SerdStatement* a, const SerdStatement* b) +{ + return (a == b || (a && b && serd_node_equals(a->nodes[0], b->nodes[0]) && + serd_node_equals(a->nodes[1], b->nodes[1]) && + serd_node_equals(a->nodes[2], b->nodes[2]) && + serd_node_equals(a->nodes[3], b->nodes[3]))); +} + +bool +serd_statement_matches(const SerdStatement* statement, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* graph) +{ + return (serd_node_pattern_match(statement->nodes[0], subject) && + serd_node_pattern_match(statement->nodes[1], predicate) && + serd_node_pattern_match(statement->nodes[2], object) && + serd_node_pattern_match(statement->nodes[3], graph)); +} + +bool +serd_statement_matches_quad(const SerdStatement* statement, const SerdQuad quad) +{ + return serd_statement_matches( + statement, quad[0], quad[1], quad[2], quad[3]); +} diff --git a/src/statement.h b/src/statement.h index 7c8384e9..4a1056f2 100644 --- a/src/statement.h +++ b/src/statement.h @@ -19,6 +19,8 @@ #include "serd/serd.h" +#include + /** Quad of nodes (a statement), or a quad pattern. @@ -31,4 +33,14 @@ struct SerdStatementImpl { SerdCursor* cursor; }; +static inline bool +serd_node_pattern_match(const SerdNode* a, const SerdNode* b) +{ + return !a || !b || (a == b) || serd_node_equals(a, b); +} + +bool +serd_statement_matches_quad(const SerdStatement* statement, + const SerdQuad quad); + #endif // SERD_STATEMENT_H diff --git a/src/string.c b/src/string.c index d6f98388..10cd537a 100644 --- a/src/string.c +++ b/src/string.c @@ -39,6 +39,7 @@ serd_strerror(SerdStatus status) case SERD_ERR_UNKNOWN: return "Unknown error"; case SERD_ERR_BAD_SYNTAX: return "Invalid syntax"; case SERD_ERR_BAD_ARG: return "Invalid argument"; + case SERD_ERR_BAD_ITER: return "Invalid iterator"; case SERD_ERR_NOT_FOUND: return "Not found"; case SERD_ERR_ID_CLASH: return "Blank node ID clash"; case SERD_ERR_BAD_CURIE: return "Invalid CURIE"; diff --git a/tests/model_test.c b/tests/model_test.c new file mode 100644 index 00000000..952f8026 --- /dev/null +++ b/tests/model_test.c @@ -0,0 +1,759 @@ +/* + Copyright 2011-2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#undef NDEBUG + +#include +#include +#include +#include +#include + +#include "serd/serd.h" + +#define WILDCARD_NODE NULL + +#define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#" +#define RDF_FIRST NS_RDF "first" +#define RDF_REST NS_RDF "rest" + +#define N_OBJECTS_PER 2U + +typedef const SerdNode* Quad[4]; + +typedef struct +{ + Quad query; + int expected_num_results; +} QueryTest; + +static const SerdNode* +manage(SerdWorld* world, SerdNode* node) +{ + return serd_nodes_manage(serd_world_get_nodes(world), node); +} + +static const SerdNode* +uri(SerdWorld* world, const size_t num) +{ + char str[] = "eg:000"; + snprintf(str + 3, 4, "%03zu", num); + return manage(world, serd_new_uri(str)); +} + +static int +generate(SerdWorld* world, + SerdModel* model, + size_t n_quads, + const SerdNode* graph) +{ + SerdNodes* nodes = serd_world_get_nodes(world); + + for (size_t i = 0; i < n_quads; ++i) { + size_t num = (i * N_OBJECTS_PER) + 1U; + + const SerdNode* ids[2 + N_OBJECTS_PER]; + for (size_t j = 0; j < 2 + N_OBJECTS_PER; ++j) { + ids[j] = uri(world, num++); + } + + for (size_t j = 0; j < N_OBJECTS_PER; ++j) { + assert(!serd_model_add(model, ids[0], ids[1], ids[2 + j], graph)); + } + } + + // Add some literals + + // (98 4 "hello") and (98 4 "hello"^^<5>) + const SerdNode* hello = manage(world, serd_new_string("hello")); + const SerdNode* hello_gb = + manage(world, serd_new_plain_literal("hello", "en-gb")); + const SerdNode* hello_us = + manage(world, serd_new_plain_literal("hello", "en-us")); + const SerdNode* hello_t4 = serd_nodes_manage( + nodes, serd_new_typed_literal("hello", uri(world, 4))); + const SerdNode* hello_t5 = serd_nodes_manage( + nodes, serd_new_typed_literal("hello", uri(world, 5))); + assert(!serd_model_add(model, uri(world, 98), uri(world, 4), hello, graph)); + assert(!serd_model_add( + model, uri(world, 98), uri(world, 4), hello_t5, graph)); + + // (96 4 "hello"^^<4>) and (96 4 "hello"^^<5>) + assert(!serd_model_add( + model, uri(world, 96), uri(world, 4), hello_t4, graph)); + assert(!serd_model_add( + model, uri(world, 96), uri(world, 4), hello_t5, graph)); + + // (94 5 "hello") and (94 5 "hello"@en-gb) + assert(!serd_model_add(model, uri(world, 94), uri(world, 5), hello, graph)); + assert(!serd_model_add( + model, uri(world, 94), uri(world, 5), hello_gb, graph)); + + // (92 6 "hello"@en-us) and (92 6 "hello"@en-gb) + assert(!serd_model_add( + model, uri(world, 92), uri(world, 6), hello_us, graph)); + assert(!serd_model_add( + model, uri(world, 92), uri(world, 6), hello_gb, graph)); + + // (14 6 "bonjour"@fr) and (14 6 "salut"@fr) + const SerdNode* bonjour = + manage(world, serd_new_plain_literal("bonjour", "fr")); + const SerdNode* salut = + manage(world, serd_new_plain_literal("salut", "fr")); + assert(!serd_model_add( + model, uri(world, 14), uri(world, 6), bonjour, graph)); + assert(!serd_model_add(model, uri(world, 14), uri(world, 6), salut, graph)); + + // Attempt to add duplicates + assert(serd_model_add(model, uri(world, 14), uri(world, 6), salut, graph)); + + // Add a blank node subject + const SerdNode* ablank = manage(world, serd_new_blank("ablank")); + assert(!serd_model_add(model, ablank, uri(world, 6), salut, graph)); + + // Add statement with URI object + assert(!serd_model_add(model, ablank, uri(world, 6), uri(world, 7), graph)); + + return EXIT_SUCCESS; +} + +static int +test_read(SerdWorld* world, + SerdModel* model, + const SerdNode* g, + const size_t n_quads) +{ + SerdIter* iter = serd_model_begin(model); + const SerdStatement* prev = NULL; + for (; !serd_iter_equals(iter, serd_model_end(model)); + serd_iter_next(iter)) { + const SerdStatement* statement = serd_iter_get(iter); + assert(serd_statement_get_subject(statement)); + assert(serd_statement_get_predicate(statement)); + assert(serd_statement_get_object(statement)); + assert(!serd_statement_equals(statement, prev)); + assert(!serd_statement_equals(prev, statement)); + prev = statement; + } + + // Attempt to increment past end + assert(serd_iter_next(iter)); + serd_iter_free(iter); + + const char* s = "hello"; + const SerdNode* plain_hello = manage(world, serd_new_string(s)); + const SerdNode* type4_hello = + manage(world, serd_new_typed_literal(s, uri(world, 4))); + const SerdNode* type5_hello = + manage(world, serd_new_typed_literal(s, uri(world, 5))); + const SerdNode* gb_hello = + manage(world, serd_new_plain_literal(s, "en-gb")); + const SerdNode* us_hello = + manage(world, serd_new_plain_literal(s, "en-us")); + +#define NUM_PATTERNS 18 + + QueryTest patterns[NUM_PATTERNS] = { + {{NULL, NULL, NULL}, (int)(n_quads * N_OBJECTS_PER) + 12}, + {{uri(world, 1), WILDCARD_NODE, WILDCARD_NODE}, 2}, + {{uri(world, 9), uri(world, 9), uri(world, 9)}, 0}, + {{uri(world, 1), uri(world, 2), uri(world, 4)}, 1}, + {{uri(world, 3), uri(world, 4), WILDCARD_NODE}, 2}, + {{WILDCARD_NODE, uri(world, 2), uri(world, 4)}, 1}, + {{WILDCARD_NODE, WILDCARD_NODE, uri(world, 4)}, 1}, + {{uri(world, 1), WILDCARD_NODE, WILDCARD_NODE}, 2}, + {{uri(world, 1), WILDCARD_NODE, uri(world, 4)}, 1}, + {{WILDCARD_NODE, uri(world, 2), WILDCARD_NODE}, 2}, + {{uri(world, 98), uri(world, 4), plain_hello}, 1}, + {{uri(world, 98), uri(world, 4), type5_hello}, 1}, + {{uri(world, 96), uri(world, 4), type4_hello}, 1}, + {{uri(world, 96), uri(world, 4), type5_hello}, 1}, + {{uri(world, 94), uri(world, 5), plain_hello}, 1}, + {{uri(world, 94), uri(world, 5), gb_hello}, 1}, + {{uri(world, 92), uri(world, 6), gb_hello}, 1}, + {{uri(world, 92), uri(world, 6), us_hello}, 1}}; + + Quad match = {uri(world, 1), uri(world, 2), uri(world, 4), g}; + assert(serd_model_ask(model, match[0], match[1], match[2], match[3])); + + Quad nomatch = {uri(world, 1), uri(world, 2), uri(world, 9), g}; + assert(!serd_model_ask( + model, nomatch[0], nomatch[1], nomatch[2], nomatch[3])); + + assert(!serd_model_get(model, NULL, NULL, uri(world, 3), g)); + assert(!serd_model_get(model, uri(world, 1), uri(world, 99), NULL, g)); + + assert(serd_node_equals( + serd_model_get(model, uri(world, 1), uri(world, 2), NULL, g), + uri(world, 3))); + assert(serd_node_equals( + serd_model_get(model, uri(world, 1), NULL, uri(world, 3), g), + uri(world, 2))); + assert(serd_node_equals( + serd_model_get(model, NULL, uri(world, 2), uri(world, 3), g), + uri(world, 1))); + + for (unsigned i = 0; i < NUM_PATTERNS; ++i) { + QueryTest test = patterns[i]; + Quad pat = {test.query[0], test.query[1], test.query[2], g}; + + SerdRange* range = + serd_model_range(model, pat[0], pat[1], pat[2], pat[3]); + int num_results = 0; + for (; !serd_range_empty(range); serd_range_next(range)) { + ++num_results; + assert(serd_statement_matches( + serd_range_front(range), pat[0], pat[1], pat[2], pat[3])); + } + serd_range_free(range); + + assert(num_results == test.expected_num_results); + } + + // Query blank node subject + const SerdNode* ablank = manage(world, serd_new_blank("ablank")); + Quad pat = {ablank, 0, 0}; + int num_results = 0; + SerdRange* range = serd_model_range(model, pat[0], pat[1], pat[2], pat[3]); + for (; !serd_range_empty(range); serd_range_next(range)) { + ++num_results; + const SerdStatement* statement = serd_range_front(range); + assert(serd_statement_matches( + statement, pat[0], pat[1], pat[2], pat[3])); + } + serd_range_free(range); + + assert(num_results == 2); + + // Test nested queries + const SerdNode* last_subject = 0; + range = serd_model_range(model, NULL, NULL, NULL, NULL); + for (; !serd_range_empty(range); serd_range_next(range)) { + const SerdStatement* statement = serd_range_front(range); + const SerdNode* subject = serd_statement_get_subject(statement); + if (subject == last_subject) { + continue; + } + + Quad subpat = {subject, 0, 0}; + SerdRange* subrange = serd_model_range( + model, subpat[0], subpat[1], subpat[2], subpat[3]); + const SerdStatement* substatement = serd_range_front(subrange); + uint64_t num_sub_results = 0; + assert(serd_statement_get_subject(substatement) == subject); + for (; !serd_range_empty(subrange); serd_range_next(subrange)) { + assert(serd_statement_matches(serd_range_front(subrange), + subpat[0], + subpat[1], + subpat[2], + subpat[3])); + ++num_sub_results; + } + serd_range_free(subrange); + assert(num_sub_results == N_OBJECTS_PER); + + uint64_t count = serd_model_count(model, subject, 0, 0, 0); + assert(count == num_sub_results); + + last_subject = subject; + } + serd_range_free(range); + + return 0; +} + +static SerdStatus +unexpected_error(void* handle, const SerdMessage* msg) +{ + (void)handle; + + fprintf(stderr, "error: "); + vfprintf(stderr, msg->fmt, *msg->args); + return SERD_SUCCESS; +} + +static SerdStatus +expected_error(void* handle, const SerdMessage* msg) +{ + (void)handle; + + fprintf(stderr, "expected: "); + vfprintf(stderr, msg->fmt, *msg->args); + return SERD_SUCCESS; +} + +static int +test_free_null(SerdWorld* world, const size_t n_quads) +{ + (void)world; + (void)n_quads; + + serd_model_free(NULL); // Shouldn't crash + return 0; +} + +static int +test_get_world(SerdWorld* world, const size_t n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_INDEX_SPO); + assert(serd_model_get_world(model) == world); + serd_model_free(model); + return 0; +} + +static int +test_get_flags(SerdWorld* world, const size_t n_quads) +{ + (void)n_quads; + + const SerdModelFlags flags = SERD_INDEX_OPS | SERD_INDEX_GRAPHS; + SerdModel* model = serd_model_new(world, flags); + assert(serd_model_get_flags(model) == (SERD_INDEX_SPO | flags)); + serd_model_free(model); + return 0; +} + +static int +test_all_begin(SerdWorld* world, const size_t n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_INDEX_SPO); + SerdRange* all = serd_model_all(model); + SerdIter* begin = serd_model_find(model, NULL, NULL, NULL, NULL); + assert(serd_iter_equals(serd_range_begin(all), begin)); + + serd_range_free(all); + serd_iter_free(begin); + serd_model_free(model); + return 0; +} + +static int +test_add_null(SerdWorld* world, const size_t n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_INDEX_SPO); + + serd_world_set_message_sink(world, expected_error, NULL); + + assert(serd_model_add(model, 0, 0, 0, 0)); + assert(serd_model_add(model, uri(world, 1), 0, 0, 0)); + assert(serd_model_add(model, uri(world, 1), uri(world, 2), 0, 0)); + assert(serd_model_empty(model)); + + serd_model_free(model); + return 0; +} + +static int +test_add_with_iterator(SerdWorld* world, const size_t n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_INDEX_SPO); + + serd_world_set_message_sink(world, expected_error, NULL); + assert(!serd_model_add( + model, uri(world, 1), uri(world, 2), uri(world, 3), 0)); + + // Add a statement with an active iterator + SerdIter* iter = serd_model_begin(model); + assert(!serd_model_add( + model, uri(world, 1), uri(world, 2), uri(world, 4), 0)); + + // Check that iterator has been invalidated + assert(!serd_iter_get(iter)); + assert(serd_iter_next(iter)); + + serd_iter_free(iter); + serd_model_free(model); + return 0; +} + +static int +test_erase_with_iterator(SerdWorld* world, const size_t n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_INDEX_SPO); + + serd_world_set_message_sink(world, expected_error, NULL); + assert(!serd_model_add( + model, uri(world, 1), uri(world, 2), uri(world, 3), 0)); + assert(!serd_model_add( + model, uri(world, 4), uri(world, 5), uri(world, 6), 0)); + + // Erase a statement with an active iterator + SerdIter* iter1 = serd_model_begin(model); + SerdIter* iter2 = serd_model_begin(model); + assert(!serd_model_erase(model, iter1)); + + // Check that erased iterator points to the next statement + assert(serd_statement_matches(serd_iter_get(iter1), + uri(world, 4), + uri(world, 5), + uri(world, 6), + 0)); + + // Check that other iterator has been invalidated + assert(!serd_iter_get(iter2)); + assert(serd_iter_next(iter2)); + + serd_iter_free(iter2); + serd_iter_free(iter1); + serd_model_free(model); + return 0; +} + +static int +test_add_erase(SerdWorld* world, const size_t n_quads) +{ + (void)n_quads; + + SerdModel* model = + serd_model_new(world, SERD_INDEX_SPO | SERD_INDEX_GRAPHS); + + // Add (s p "hello") + const SerdNode* s = uri(world, 1); + const SerdNode* p = uri(world, 2); + const SerdNode* hello = manage(world, serd_new_string("hello")); + assert(!serd_model_add(model, s, p, hello, 0)); + assert(serd_model_ask(model, s, p, hello, 0)); + + // Add (s p "hi") + const SerdNode* hi = manage(world, serd_new_string("hi")); + assert(!serd_model_add(model, s, p, hi, NULL)); + assert(serd_model_ask(model, s, p, hi, 0)); + + // Erase (s p "hi") + SerdIter* iter = serd_model_find(model, s, p, hi, NULL); + assert(!serd_model_erase(model, iter)); + assert(serd_model_size(model) == 1); + serd_iter_free(iter); + + // Check that erased statement can not be found + SerdRange* empty = serd_model_range(model, s, p, hi, NULL); + assert(serd_range_empty(empty)); + serd_range_free(empty); + + serd_model_free(model); + return 0; +} + +static int +test_erase_all(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_INDEX_SPO); + generate(world, model, n_quads, NULL); + + SerdIter* iter = serd_model_begin(model); + while (!serd_iter_equals(iter, serd_model_end(model))) { + assert(!serd_model_erase(model, iter)); + } + + serd_iter_free(iter); + serd_model_free(model); + return 0; +} + +static int +test_copy(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_INDEX_SPO); + generate(world, model, n_quads, NULL); + + SerdModel* copy = serd_model_copy(model); + assert(serd_model_equals(model, copy)); + + serd_model_free(model); + serd_model_free(copy); + return 0; +} + +static int +test_equals(SerdWorld* world, const size_t n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_INDEX_SPO); + generate(world, model, n_quads, NULL); + serd_model_add( + model, uri(world, 0), uri(world, 1), uri(world, 2), uri(world, 3)); + + assert(serd_model_equals(NULL, NULL)); + assert(!serd_model_equals(NULL, model)); + assert(!serd_model_equals(model, NULL)); + + SerdModel* empty = serd_model_new(world, SERD_INDEX_SPO); + assert(!serd_model_equals(model, empty)); + + SerdModel* different = serd_model_new(world, SERD_INDEX_SPO); + generate(world, different, n_quads, NULL); + serd_model_add(different, + uri(world, 1), + uri(world, 1), + uri(world, 2), + uri(world, 3)); + + assert(serd_model_size(model) == serd_model_size(different)); + assert(!serd_model_equals(model, different)); + + serd_model_free(model); + serd_model_free(empty); + serd_model_free(different); + return 0; +} + +static int +test_find_past_end(SerdWorld* world, const size_t n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_INDEX_SPO); + const SerdNode* s = uri(world, 1); + const SerdNode* p = uri(world, 2); + const SerdNode* o = uri(world, 3); + assert(!serd_model_add(model, s, p, o, 0)); + assert(serd_model_ask(model, s, p, o, 0)); + + const SerdNode* huge = uri(world, 999); + SerdRange* range = serd_model_range(model, huge, huge, huge, 0); + assert(serd_range_empty(range)); + + serd_range_free(range); + serd_model_free(model); + return 0; +} + +static int +test_iter_comparison(SerdWorld* world, const size_t n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_INDEX_SPO); + + serd_world_set_message_sink(world, expected_error, NULL); + assert(!serd_model_add( + model, uri(world, 1), uri(world, 2), uri(world, 3), 0)); + + // Add a statement with an active iterator + SerdIter* iter1 = serd_model_begin(model); + SerdIter* iter2 = serd_model_begin(model); + assert(serd_iter_equals(iter1, iter2)); + + serd_iter_next(iter1); + assert(!serd_iter_equals(iter1, iter2)); + + const SerdIter* end = serd_model_end(model); + assert(serd_iter_equals(iter1, end)); + + serd_iter_free(iter2); + serd_iter_free(iter1); + serd_model_free(model); + return 0; +} + +static int +test_triple_index_read(SerdWorld* world, const size_t n_quads) +{ + for (unsigned i = 0; i < 6; ++i) { + SerdModel* model = serd_model_new(world, (1 << i)); + generate(world, model, n_quads, 0); + assert(!test_read(world, model, 0, n_quads)); + serd_model_free(model); + } + + return 0; +} + +static int +test_quad_index_read(SerdWorld* world, const size_t n_quads) +{ + for (unsigned i = 0; i < 6; ++i) { + SerdModel* model = serd_model_new(world, (1 << i) | SERD_INDEX_GRAPHS); + const SerdNode* graph = uri(world, 42); + generate(world, model, n_quads, graph); + assert(!test_read(world, model, graph, n_quads)); + serd_model_free(model); + } + + return 0; +} + +static int +test_remove_graph(SerdWorld* world, const size_t n_quads) +{ + (void)n_quads; + + SerdModel* model = + serd_model_new(world, SERD_INDEX_SPO | SERD_INDEX_GRAPHS); + + // Generate a couple of graphs + const SerdNode* graph42 = uri(world, 42); + const SerdNode* graph43 = uri(world, 43); + generate(world, model, 1, graph42); + generate(world, model, 1, graph43); + + // Remove one graph via range + SerdRange* range = serd_model_range(model, NULL, NULL, NULL, graph43); + SerdStatus st = serd_model_erase_range(model, range); + assert(!st); + serd_range_free(range); + + // Erase the first tuple (an element in the default graph) + SerdIter* iter = serd_model_begin(model); + assert(!serd_model_erase(model, iter)); + serd_iter_free(iter); + + // Ensure only the other graph is left + Quad pat = {0, 0, 0, graph42}; + for (iter = serd_model_begin(model); + !serd_iter_equals(iter, serd_model_end(model)); + serd_iter_next(iter)) { + assert(serd_statement_matches( + serd_iter_get(iter), pat[0], pat[1], pat[2], pat[3])); + } + serd_iter_free(iter); + + serd_model_free(model); + return 0; +} + +static int +test_default_graph(SerdWorld* world, const size_t n_quads) +{ + (void)n_quads; + + SerdModel* model = + serd_model_new(world, SERD_INDEX_SPO | SERD_INDEX_GRAPHS); + const SerdNode* s = uri(world, 1); + const SerdNode* p = uri(world, 2); + const SerdNode* o = uri(world, 3); + const SerdNode* g1 = uri(world, 101); + const SerdNode* g2 = uri(world, 102); + + // Insert the same statement into two graphs + assert(!serd_model_add(model, s, p, o, g1)); + assert(!serd_model_add(model, s, p, o, g2)); + + // Ensure we only see statement once in the default graph + assert(serd_model_count(model, s, p, o, NULL) == 1); + + serd_model_free(model); + return 0; +} + +static int +test_write_bad_list(SerdWorld* world, const size_t n_quads) +{ + (void)n_quads; + + SerdModel* model = + serd_model_new(world, SERD_INDEX_SPO | SERD_INDEX_GRAPHS); + SerdNodes* nodes = serd_nodes_new(); + const SerdNode* s = manage(world, serd_new_uri("urn:s")); + const SerdNode* p = manage(world, serd_new_uri("urn:p")); + const SerdNode* list1 = manage(world, serd_new_blank("l1")); + const SerdNode* list2 = manage(world, serd_new_blank("l2")); + const SerdNode* nofirst = manage(world, serd_new_blank("nof")); + const SerdNode* norest = manage(world, serd_new_blank("nor")); + const SerdNode* pfirst = manage(world, serd_new_uri(RDF_FIRST)); + const SerdNode* prest = manage(world, serd_new_uri(RDF_REST)); + const SerdNode* val1 = manage(world, serd_new_string("a")); + const SerdNode* val2 = manage(world, serd_new_string("b")); + + // List where second node has no rdf:first + serd_model_add(model, s, p, list1, NULL); + serd_model_add(model, list1, pfirst, val1, NULL); + serd_model_add(model, list1, prest, nofirst, NULL); + + // List where second node has no rdf:rest + serd_model_add(model, s, p, list2, NULL); + serd_model_add(model, list2, pfirst, val1, NULL); + serd_model_add(model, list2, prest, norest, NULL); + serd_model_add(model, norest, pfirst, val2, NULL); + + SerdBuffer buffer = {NULL, 0}; + SerdEnv* env = serd_env_new(NULL); + SerdWriter* writer = serd_writer_new( + world, SERD_TURTLE, 0, env, serd_buffer_sink, &buffer); + + SerdRange* all = serd_model_all(model); + serd_range_serialise(all, serd_writer_get_sink(writer), 0); + serd_range_free(all); + + serd_writer_finish(writer); + const char* str = serd_buffer_sink_finish(&buffer); + const char* expected = "\n" + " (\n" + " \"a\"\n" + " ) , (\n" + " \"a\"\n" + " \"b\"\n" + " ) .\n"; + + assert(!strcmp(str, expected)); + + free(buffer.buf); + serd_writer_free(writer); + serd_model_free(model); + serd_env_free(env); + serd_nodes_free(nodes); + return 0; +} + +int +main(void) +{ + static const size_t n_quads = 300; + + serd_model_free(NULL); // Shouldn't crash + + typedef int (*TestFunc)(SerdWorld*, size_t); + + const TestFunc tests[] = {test_free_null, + test_get_world, + test_get_flags, + test_all_begin, + test_add_null, + test_add_with_iterator, + test_erase_with_iterator, + test_add_erase, + test_erase_all, + test_copy, + test_equals, + test_find_past_end, + test_iter_comparison, + test_triple_index_read, + test_quad_index_read, + test_remove_graph, + test_default_graph, + test_write_bad_list, + NULL}; + + SerdWorld* world = serd_world_new(); + for (const TestFunc* t = tests; *t; ++t) { + serd_world_set_message_sink(world, unexpected_error, NULL); + if ((*t)(world, n_quads)) { + return EXIT_FAILURE; + } + } + + serd_world_free(world); + return 0; +} diff --git a/tests/sink_test.c b/tests/sink_test.c new file mode 100644 index 00000000..b6f0b358 --- /dev/null +++ b/tests/sink_test.c @@ -0,0 +1,127 @@ +/* + Copyright 2019 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#undef NDEBUG + +#include "serd/serd.h" + +#include +#include + +#define NS_EG "http://example.org/" + +typedef struct +{ + SerdStatus return_status; + const SerdNode* last_base; + const SerdNode* last_name; + const SerdNode* last_namespace; + const SerdNode* last_end; + const SerdStatement* last_statement; +} State; + +static SerdStatus +on_base(void* handle, const SerdNode* uri) +{ + State* state = (State*)handle; + + state->last_base = uri; + return state->return_status; +} + +static SerdStatus +on_prefix(void* handle, const SerdNode* name, const SerdNode* uri) +{ + State* state = (State*)handle; + + state->last_name = name; + state->last_namespace = uri; + return state->return_status; +} + +static SerdStatus +on_statement(void* handle, + SerdStatementFlags flags, + const SerdStatement* statement) +{ + State* state = (State*)handle; + + state->last_statement = statement; + return state->return_status; +} + +static SerdStatus +on_end(void* handle, const SerdNode* node) +{ + State* state = (State*)handle; + + state->last_end = node; + return state->return_status; +} + +int +main(void) +{ + SerdNodes* const nodes = serd_nodes_new(); + + const SerdNode* base = serd_nodes_manage(nodes, serd_new_uri(NS_EG)); + const SerdNode* name = serd_nodes_manage(nodes, serd_new_string("eg")); + const SerdNode* uri = serd_nodes_manage(nodes, serd_new_uri(NS_EG "uri")); + const SerdNode* blank = serd_nodes_manage(nodes, serd_new_blank("b1")); + SerdEnv* env = serd_env_new(base); + + SerdStatement* const statement = + serd_statement_new(base, uri, blank, NULL, NULL); + + State state = {SERD_SUCCESS, 0, 0, 0, 0, 0}; + SerdSink* sink = serd_sink_new(&state, env); + + assert(serd_sink_get_env(sink) == env); + + // Call without having any functions set + + assert(!serd_sink_write_base(sink, base)); + assert(!serd_sink_write_prefix(sink, name, uri)); + assert(!serd_sink_write_statement(sink, 0, statement)); + assert(!serd_sink_write(sink, 0, base, uri, blank, NULL)); + assert(!serd_sink_write_end(sink, blank)); + + // Set functions and try again + + serd_sink_set_base_func(sink, on_base); + assert(!serd_sink_write_base(sink, base)); + assert(serd_node_equals(state.last_base, base)); + + serd_sink_set_prefix_func(sink, on_prefix); + assert(!serd_sink_write_prefix(sink, name, uri)); + assert(serd_node_equals(state.last_name, name)); + assert(serd_node_equals(state.last_namespace, uri)); + + serd_sink_set_statement_func(sink, on_statement); + assert(!serd_sink_write_statement(sink, 0, statement)); + assert(serd_statement_equals(state.last_statement, statement)); + + serd_sink_set_end_func(sink, on_end); + assert(!serd_sink_write_end(sink, blank)); + assert(serd_node_equals(state.last_end, blank)); + + serd_sink_free(sink); + serd_statement_free(statement); + serd_env_free(env); + serd_nodes_free(nodes); + + return 0; +} diff --git a/tests/statement_test.c b/tests/statement_test.c new file mode 100644 index 00000000..0780ea6e --- /dev/null +++ b/tests/statement_test.c @@ -0,0 +1,81 @@ +/* + Copyright 2011-2019 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#undef NDEBUG + +#include "serd/serd.h" + +#include +#include + +#define NS_EG "http://example.org/" + +int +main(void) +{ + SerdNodes* const nodes = serd_nodes_new(); + + const SerdNode* const f = serd_nodes_manage(nodes, serd_new_string("file")); + const SerdNode* const s = serd_nodes_manage(nodes, serd_new_uri(NS_EG "s")); + const SerdNode* const p = serd_nodes_manage(nodes, serd_new_uri(NS_EG "p")); + const SerdNode* const o = serd_nodes_manage(nodes, serd_new_uri(NS_EG "o")); + const SerdNode* const g = serd_nodes_manage(nodes, serd_new_uri(NS_EG "g")); + + assert(!serd_statement_copy(NULL)); + + SerdCursor* const cursor = serd_cursor_new(f, 1, 1); + SerdStatement* const statement = serd_statement_new(s, p, o, g, cursor); + assert(serd_statement_equals(statement, statement)); + assert(!serd_statement_equals(statement, NULL)); + assert(!serd_statement_equals(NULL, statement)); + assert(serd_statement_get_subject(statement) == s); + assert(serd_statement_get_predicate(statement) == p); + assert(serd_statement_get_object(statement) == o); + assert(serd_statement_get_graph(statement) == g); + assert(serd_statement_get_cursor(statement) != cursor); + assert(serd_cursor_equals(serd_statement_get_cursor(statement), cursor)); + assert(serd_statement_matches(statement, s, p, o, g)); + assert(serd_statement_matches(statement, NULL, p, o, g)); + assert(serd_statement_matches(statement, s, NULL, o, g)); + assert(serd_statement_matches(statement, s, p, NULL, g)); + assert(serd_statement_matches(statement, s, p, o, NULL)); + assert(!serd_statement_matches(statement, o, NULL, NULL, NULL)); + assert(!serd_statement_matches(statement, NULL, o, NULL, NULL)); + assert(!serd_statement_matches(statement, NULL, NULL, s, NULL)); + assert(!serd_statement_matches(statement, NULL, NULL, NULL, s)); + + SerdStatement* const diff_s = serd_statement_new(o, p, o, g, cursor); + assert(!serd_statement_equals(statement, diff_s)); + serd_statement_free(diff_s); + + SerdStatement* const diff_p = serd_statement_new(s, o, o, g, cursor); + assert(!serd_statement_equals(statement, diff_p)); + serd_statement_free(diff_p); + + SerdStatement* const diff_o = serd_statement_new(s, p, s, g, cursor); + assert(!serd_statement_equals(statement, diff_o)); + serd_statement_free(diff_o); + + SerdStatement* const diff_g = serd_statement_new(s, p, o, s, cursor); + assert(!serd_statement_equals(statement, diff_g)); + serd_statement_free(diff_g); + + serd_statement_free(statement); + serd_cursor_free(cursor); + serd_nodes_free(nodes); + + return 0; +} diff --git a/wscript b/wscript index e8abe917..f047e25f 100644 --- a/wscript +++ b/wscript @@ -22,8 +22,9 @@ out = 'build' # Build directory def options(ctx): ctx.load('compiler_c') + opt = ctx.configuration_options() ctx.add_flags( - ctx.configuration_options(), + opt, {'no-utils': 'do not build command line utilities', 'stack-check': 'include runtime stack sanity checks', 'static': 'build static library', @@ -77,9 +78,13 @@ lib_source = ['src/base64.c', 'src/byte_source.c', 'src/cursor.c', 'src/env.c', + 'src/inserter.c', + 'src/iter.c', + 'src/model.c', 'src/n3.c', 'src/node.c', 'src/nodes.c', + 'src/range.c', 'src/reader.c', 'src/sink.c', 'src/statement.c', @@ -89,6 +94,7 @@ lib_source = ['src/base64.c', 'src/uri.c', 'src/world.c', 'src/writer.c', + 'src/zix/btree.c', 'src/zix/digest.c', 'src/zix/hash.c'] @@ -150,10 +156,13 @@ def build(bld): for prog in [('serdi_static', 'src/serdi.c'), ('base64_test', 'tests/base64_test.c'), ('cursor_test', 'tests/cursor_test.c'), + ('statement_test', 'tests/statement_test.c'), + ('sink_test', 'tests/sink_test.c'), ('serd_test', 'tests/serd_test.c'), ('read_chunk_test', 'tests/read_chunk_test.c'), ('nodes_test', 'tests/nodes_test.c'), - ('overflow_test', 'tests/overflow_test.c')]: + ('overflow_test', 'tests/overflow_test.c'), + ('model_test', 'tests/model_test.c')]: bld(features = 'c cprogram', source = prog[1], use = 'libserd_profiled', @@ -341,6 +350,22 @@ def _load_rdf(filename): return model, instances +def _file_lines_equal(patha, pathb, subst_from='', subst_to=''): + import io + + for path in (patha, pathb): + if not os.access(path, os.F_OK): + Logs.pprint('RED', 'error: missing file %s' % path) + return False + + la = sorted(set(io.open(patha, encoding='utf-8').readlines())) + lb = sorted(set(io.open(pathb, encoding='utf-8').readlines())) + if la != lb: + autowaf.show_diff(la, lb, patha, pathb) + return False + + return True + def test_suite(ctx, base_uri, testdir, report, isyntax, options=[]): import itertools @@ -397,9 +422,19 @@ def test_suite(ctx, base_uri, testdir, report, isyntax, options=[]): if report is not None: report.write(earl_assertion(test, result, asserter)) - # Run lax test - check([command[0]] + ['-l'] + command[1:], - expected=None, name=action + ' lax') + if expected_return != 0: + # Run lax parsing test for negative test (may still fail) + check([command[0]] + ['-l'] + command[1:], + expected=None, name=action + ' lax') + else: + # Run model test for positive test (must succeed) + model_out_path = action + '.model.out' + check([command[0]] + ['-m'] + command[1:], stdout=model_out_path, + name=action + ' model') + + if result and ((mf + 'result') in model[test]): + check(lambda: _file_lines_equal(check_path, model_out_path), + name=action + ' model check') ns_rdftest = 'http://www.w3.org/ns/rdftest#' for test_class, instances in instances.items(): @@ -425,6 +460,9 @@ def test(tst): with tst.group('Unit') as check: check(['./base64_test']) check(['./cursor_test']) + check(['./statement_test']) + check(['./sink_test']) + check(['./model_test']) check(['./nodes_test']) check(['./overflow_test']) check(['./serd_test']) -- cgit v1.2.1