diff options
author | David Robillard <d@drobilla.net> | 2018-05-12 13:28:47 +0200 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2021-03-08 23:34:52 -0500 |
commit | 677d85c7f3df3563e990f41964f6ae7d560567a0 (patch) | |
tree | 9b5113cb7c2b2cc7ae8a07ab5a21e5a55d579559 | |
parent | 15485f61df8cbd922907db39fcc864abb002eea3 (diff) | |
download | serd-677d85c7f3df3563e990f41964f6ae7d560567a0.tar.gz serd-677d85c7f3df3563e990f41964f6ae7d560567a0.tar.bz2 serd-677d85c7f3df3563e990f41964f6ae7d560567a0.zip |
Add model
-rw-r--r-- | NEWS | 1 | ||||
-rw-r--r-- | doc/serdi.1 | 12 | ||||
-rw-r--r-- | include/serd/serd.h | 420 | ||||
-rw-r--r-- | meson.build | 6 | ||||
-rw-r--r-- | src/inserter.c | 128 | ||||
-rw-r--r-- | src/iter.c | 211 | ||||
-rw-r--r-- | src/iter.h | 98 | ||||
-rw-r--r-- | src/model.c | 658 | ||||
-rw-r--r-- | src/model.h | 41 | ||||
-rw-r--r-- | src/node.h | 15 | ||||
-rw-r--r-- | src/range.c | 326 | ||||
-rw-r--r-- | src/range.h | 35 | ||||
-rw-r--r-- | src/serdi.c | 41 | ||||
-rw-r--r-- | src/statement.c | 78 | ||||
-rw-r--r-- | src/statement.h | 7 | ||||
-rw-r--r-- | src/string.c | 2 | ||||
-rw-r--r-- | test/.clang-tidy | 1 | ||||
-rw-r--r-- | test/meson.build | 3 | ||||
-rwxr-xr-x | test/run_test_suite.py | 29 | ||||
-rw-r--r-- | test/test_free_null.c | 4 | ||||
-rw-r--r-- | test/test_model.c | 900 | ||||
-rw-r--r-- | test/test_sink.c | 151 | ||||
-rw-r--r-- | test/test_statement.c | 91 |
23 files changed, 3253 insertions, 5 deletions
@@ -3,6 +3,7 @@ serd (1.0.1) unstable; * Add SerdBuffer for mutable buffers to keep SerdChunk const-correct * Add SerdWorld for shared library state * Add extensible logging API + * Add model for storing statements in memory * Add option for writing terse output without newlines * Add support for parsing variables * Add support for writing terse collections diff --git a/doc/serdi.1 b/doc/serdi.1 index b89bfba7..ce0b1ed4 100644 --- a/doc/serdi.1 +++ b/doc/serdi.1 @@ -64,7 +64,11 @@ This is useful when reading from a pipe since output will be generated immediate With this option serdi uses one page less memory, but will likely be significantly slower. .Pp .It Fl f -Keep full URIs in input (don't qualify with namespace prefixes or make URIs relative). +Fast and loose mode. +This disables shortening URIs into prefixed names or relative URI references. +If the model is enabled, then this writes the model quickly in sorted order. +Note that doing so with TriG or Turtle may make the output ugly, +since blank nodes will not be inlined. .Pp .It Fl h Print the command line options. @@ -92,6 +96,12 @@ Lax (non-strict) parsing. If this is enabled, recoverable syntax errors will print a warning, but parsing will proceed starting at the next statement if possible. Note that data may be lost when using this option. .Pp +.It Fl m +Build a model in memory. +This loads all of the input into memory before writing the output. +This will reorder statements and eliminate duplicates, at the cost of performance and memory consumption. +When writing TriG or Turtle, this may enable better pretty-printing with more inline descriptions. +.Pp .It Fl o Ar syntax Write output as .Ar syntax . diff --git a/include/serd/serd.h b/include/serd/serd.h index 04d8708b..14e321b5 100644 --- a/include/serd/serd.h +++ b/include/serd/serd.h @@ -102,6 +102,15 @@ typedef struct SerdCursorImpl SerdCursor; /// Lexical environment for relative URIs or CURIEs (base URI and namespaces) typedef struct SerdEnvImpl SerdEnv; +/// An indexed set of statements +typedef struct SerdModelImpl SerdModel; + +/// An iterator that points to a statement in a model +typedef struct SerdIterImpl SerdIter; + +/// A range of statements in a model +typedef struct SerdRangeImpl SerdRange; + /// Streaming parser that reads a text stream and writes to a sink typedef struct SerdReaderImpl SerdReader; @@ -118,6 +127,7 @@ typedef enum { SERD_ERR_UNKNOWN, ///< Unknown error SERD_ERR_BAD_SYNTAX, ///< Invalid syntax SERD_ERR_BAD_ARG, ///< Invalid argument + SERD_ERR_BAD_ITER, ///< Use of invalidated iterator SERD_ERR_NOT_FOUND, ///< Not found SERD_ERR_ID_CLASH, ///< Encountered clashing blank node IDs SERD_ERR_BAD_CURIE, ///< Invalid CURIE (e.g. prefix does not exist) @@ -153,6 +163,14 @@ typedef enum { /// Bitwise OR of SerdStatementFlag values typedef uint32_t SerdStatementFlags; +/// Flags that control the style of a model serialisation +typedef enum { + SERD_NO_INLINE_OBJECTS = 1u << 0u ///< Disable object inlining +} SerdSerialisationFlag; + +/// Bitwise OR of SerdStatementFlag values +typedef uint32_t SerdSerialisationFlags; + /** Type of a node. @@ -229,7 +247,22 @@ typedef enum { SERD_GRAPH = 3 ///< Graph ("context") } SerdField; -/// A syntactic RDF node +/// Flags that control model storage and indexing +typedef enum { + SERD_INDEX_SPO = 1u << 0u, ///< Subject, Predicate, Object + SERD_INDEX_SOP = 1u << 1u, ///< Subject, Object, Predicate + SERD_INDEX_OPS = 1u << 2u, ///< Object, Predicate, Subject + SERD_INDEX_OSP = 1u << 3u, ///< Object, Subject, Predicate + SERD_INDEX_PSO = 1u << 4u, ///< Predicate, Subject, Object + SERD_INDEX_POS = 1u << 5u, ///< Predicate, Object, Subject + SERD_INDEX_GRAPHS = 1u << 6u, ///< Support multiple graphs in model + SERD_STORE_CURSORS = 1u << 7u ///< Store original cursor of statements +} SerdModelFlag; + +/// Bitwise OR of SerdModelFlag values +typedef uint32_t SerdModelFlags; + +/// An RDF node typedef struct SerdNodeImpl SerdNode; /// An unterminated immutable slice of a string @@ -1663,10 +1696,261 @@ serd_nodes_deref(SerdNodes* SERD_NONNULL nodes, /** @} + @defgroup serd_model Model + @{ +*/ + +/** + Create a new model + + @param world The world in which to make this model. + + @param flags Model options, including enabled indices, for example `SERD_SPO + | SERD_OPS`. Be sure to enable an index where the most significant node(s) + are not variables in your queries. For example, to make (? P O) queries, + enable either SERD_OPS or SERD_POS. +*/ +SERD_API +SerdModel* SERD_ALLOCATED +serd_model_new(SerdWorld* SERD_NONNULL world, SerdModelFlags flags); + +/// Return a deep copy of `model` +SERD_API +SerdModel* SERD_ALLOCATED +serd_model_copy(const SerdModel* SERD_NONNULL model); + +/// Return true iff `a` is equal to `b`, ignoring statement cursor metadata +SERD_API +bool +serd_model_equals(const SerdModel* SERD_NULLABLE a, + const SerdModel* SERD_NULLABLE b); + +/// Close and free `model` +SERD_API +void +serd_model_free(SerdModel* SERD_NULLABLE model); + +/// Get the world associated with `model` +SERD_PURE_API +SerdWorld* SERD_NONNULL +serd_model_world(SerdModel* SERD_NONNULL model); + +/// Get the flags enabled on `model` +SERD_PURE_API +SerdModelFlags +serd_model_flags(const SerdModel* SERD_NONNULL model); + +/// Return the number of statements stored in `model` +SERD_PURE_API +size_t +serd_model_size(const SerdModel* SERD_NONNULL model); + +/// Return true iff there are no statements stored in `model` +SERD_PURE_API +bool +serd_model_empty(const SerdModel* SERD_NONNULL model); + +/// Return an iterator to the start of `model` +SERD_API +SerdIter* SERD_ALLOCATED +serd_model_begin(const SerdModel* SERD_NONNULL model); + +/// Return an iterator to the end of `model` +SERD_PURE_API +const SerdIter* SERD_NONNULL +serd_model_end(const SerdModel* SERD_NONNULL model); + +/// Return a range of all statements in `model` +SERD_API +SerdRange* SERD_ALLOCATED +serd_model_all(const SerdModel* SERD_NONNULL model); + +/** + Search for statements by a quad pattern + + @return An iterator to the first match, or NULL if no matches found. +*/ +SERD_API +SerdIter* SERD_ALLOCATED +serd_model_find(const SerdModel* SERD_NONNULL model, + const SerdNode* SERD_NULLABLE s, + const SerdNode* SERD_NULLABLE p, + const SerdNode* SERD_NULLABLE o, + const SerdNode* SERD_NULLABLE g); + +/** + Search for statements by a quad pattern + + @return A range containing all matching statements. +*/ +SERD_API +SerdRange* SERD_ALLOCATED +serd_model_range(const SerdModel* SERD_NONNULL model, + const SerdNode* SERD_NULLABLE s, + const SerdNode* SERD_NULLABLE p, + const SerdNode* SERD_NULLABLE o, + const SerdNode* SERD_NULLABLE g); + +/** + Search for a single node that matches a pattern + + Exactly one of `s`, `p`, `o` must be NULL. + This function is mainly useful for predicates that only have one value. + @return The first matching node, or NULL if no matches are found. +*/ +SERD_API +const SerdNode* SERD_NULLABLE +serd_model_get(const SerdModel* SERD_NONNULL model, + const SerdNode* SERD_NULLABLE s, + const SerdNode* SERD_NULLABLE p, + const SerdNode* SERD_NULLABLE o, + const SerdNode* SERD_NULLABLE g); + +/** + Search for a single statement that matches a pattern + + This function is mainly useful for predicates that only have one value. + + @return The first matching statement, or NULL if none are found. +*/ +SERD_API +const SerdStatement* SERD_NULLABLE +serd_model_get_statement(const SerdModel* SERD_NONNULL model, + const SerdNode* SERD_NULLABLE s, + const SerdNode* SERD_NULLABLE p, + const SerdNode* SERD_NULLABLE o, + const SerdNode* SERD_NULLABLE g); + +/// Return true iff a statement exists +SERD_API +bool +serd_model_ask(const SerdModel* SERD_NONNULL model, + const SerdNode* SERD_NULLABLE s, + const SerdNode* SERD_NULLABLE p, + const SerdNode* SERD_NULLABLE o, + const SerdNode* SERD_NULLABLE g); + +/// Return the number of matching statements +SERD_API +size_t +serd_model_count(const SerdModel* SERD_NONNULL model, + const SerdNode* SERD_NULLABLE s, + const SerdNode* SERD_NULLABLE p, + const SerdNode* SERD_NULLABLE o, + const SerdNode* SERD_NULLABLE g); + +/** + Add a statement to a model from nodes + + This function fails if there are any active iterators on `model`. +*/ +SERD_API +SerdStatus +serd_model_add(SerdModel* SERD_NONNULL model, + const SerdNode* SERD_NULLABLE s, + const SerdNode* SERD_NULLABLE p, + const SerdNode* SERD_NULLABLE o, + const SerdNode* SERD_NULLABLE g); + +/** + Add a statement to a model + + This function fails if there are any active iterators on `model`. + If statement is null, then SERD_FAILURE is returned. +*/ +SERD_API +SerdStatus +serd_model_insert(SerdModel* SERD_NONNULL model, + const SerdStatement* SERD_NULLABLE statement); + +/** + Add a range of statements to a model + + This function fails if there are any active iterators on `model`. +*/ +SERD_API +SerdStatus +serd_model_add_range(SerdModel* SERD_NONNULL model, + SerdRange* SERD_NONNULL range); + +/** + Remove a quad from a model via an iterator + + Calling this function invalidates all iterators on `model` except `iter`. + + @param model The model which `iter` points to. + @param iter Iterator to the element to erase, which is incremented to the + next value on return. +*/ +SERD_API +SerdStatus +serd_model_erase(SerdModel* SERD_NONNULL model, SerdIter* SERD_NONNULL iter); + +/** + Remove a range from a model + + Calling this function invalidates all iterators on `model` except `iter`. + + @param model The model which `range` points to. + @param range Range to erase, which will be empty on return; +*/ +SERD_API +SerdStatus +serd_model_erase_range(SerdModel* SERD_NONNULL model, + SerdRange* SERD_NONNULL range); + +/** + @} + @defgroup serd_inserter Inserter + @{ +*/ + +/// Create an inserter for writing statements to a model +SERD_API +SerdSink* SERD_ALLOCATED +serd_inserter_new(SerdModel* SERD_NONNULL model, + SerdEnv* SERD_NONNULL env, + const SerdNode* SERD_NULLABLE default_graph); + +/** + @} @defgroup serd_statement Statement @{ */ +/** + Create a new statement + + Note that, to minimise model overhead, statements do not own their nodes, so + they must have a longer lifetime than the statement for it to be valid. For + statements in models, this is the lifetime of the model. For user-created + statements, the simplest way to handle this is to use `SerdNodes`. + + @param s The subject + @param p The predicate ("key") + @param o The object ("value") + @param g The graph ("context") + @param cursor Optional cursor at the origin of this statement + @return A new statement that must be freed with serd_statement_free() +*/ +SERD_API +SerdStatement* SERD_ALLOCATED +serd_statement_new(const SerdNode* SERD_NONNULL s, + const SerdNode* SERD_NONNULL p, + const SerdNode* SERD_NONNULL o, + const SerdNode* SERD_NULLABLE g, + const SerdCursor* SERD_NULLABLE cursor); + +/// Return a copy of `statement` +SERD_API +SerdStatement* SERD_ALLOCATED +serd_statement_copy(const SerdStatement* SERD_NULLABLE statement); + +/// Free `statement` +SERD_API +void +serd_statement_free(SerdStatement* SERD_NULLABLE statement); + /// Return the given node in `statement` SERD_PURE_API const SerdNode* SERD_NULLABLE @@ -1699,6 +1983,140 @@ const SerdCursor* SERD_NULLABLE serd_statement_cursor(const SerdStatement* SERD_NONNULL statement); /** + Return true iff `a` is equal to `b`, ignoring statement cursor metadata + + Only returns true if nodes are equivalent, does not perform wildcard + matching. +*/ +SERD_PURE_API +bool +serd_statement_equals(const SerdStatement* SERD_NULLABLE a, + const SerdStatement* SERD_NULLABLE b); + +/** + Return true iff `statement` matches the given pattern + + Nodes match if they are equivalent, or if one of them is NULL. The + statement matches if every node matches. +*/ +SERD_PURE_API +bool +serd_statement_matches(const SerdStatement* SERD_NONNULL statement, + const SerdNode* SERD_NULLABLE subject, + const SerdNode* SERD_NULLABLE predicate, + const SerdNode* SERD_NULLABLE object, + const SerdNode* SERD_NULLABLE graph); + +/** + @} + @defgroup serd_iterator Iterator + @{ +*/ + +/// Return a new copy of `iter` +SERD_API +SerdIter* SERD_ALLOCATED +serd_iter_copy(const SerdIter* SERD_NULLABLE iter); + +/// Return the statement pointed to by `iter` +SERD_API +const SerdStatement* SERD_NULLABLE +serd_iter_get(const SerdIter* SERD_NONNULL iter); + +/** + Increment `iter` to point to the next statement + + @return True iff `iter` has reached the end. +*/ +SERD_API +bool +serd_iter_next(SerdIter* SERD_NONNULL iter); + +/// Return true iff `lhs` is equal to `rhs` +SERD_PURE_API +bool +serd_iter_equals(const SerdIter* SERD_NULLABLE lhs, + const SerdIter* SERD_NULLABLE rhs); + +/// Free `iter` +SERD_API +void +serd_iter_free(SerdIter* SERD_NULLABLE iter); + +/** + @} + @defgroup serd_range Range + @{ +*/ + +/// Return a new copy of `range` +SERD_API +SerdRange* SERD_ALLOCATED +serd_range_copy(const SerdRange* SERD_NULLABLE range); + +/// Free `range` +SERD_API +void +serd_range_free(SerdRange* SERD_NULLABLE range); + +/// Return the first statement in `range`, or NULL if `range` is empty +SERD_API +const SerdStatement* SERD_NULLABLE +serd_range_front(const SerdRange* SERD_NONNULL range); + +/// Return true iff `lhs` is equal to `rhs` +SERD_PURE_API +bool +serd_range_equals(const SerdRange* SERD_NULLABLE lhs, + const SerdRange* SERD_NULLABLE rhs); + +/// Increment the start of `range` to point to the next statement +SERD_API +bool +serd_range_next(SerdRange* SERD_NONNULL range); + +/// Return true iff there are no statements in `range` +SERD_PURE_API +bool +serd_range_empty(const SerdRange* SERD_NULLABLE range); + +/// Return an iterator to the start of `range` +SERD_PURE_API +const SerdIter* SERD_NULLABLE +serd_range_cbegin(const SerdRange* SERD_NONNULL range); + +/// Return an iterator to the end of `range` +SERD_PURE_API +const SerdIter* SERD_NULLABLE +serd_range_cend(const SerdRange* SERD_NONNULL range); + +/// Return an iterator to the start of `range` +SERD_PURE_API +SerdIter* SERD_NULLABLE +serd_range_begin(SerdRange* SERD_NONNULL range); + +/// Return an iterator to the end of `range` +SERD_PURE_API +SerdIter* SERD_NULLABLE +serd_range_end(SerdRange* SERD_NONNULL range); + +/** + Write `range` to `sink` + + The serialisation style can be controlled with `flags`. The default is to + write statements in an order suited for pretty-printing with Turtle or TriG + with as many objects written inline as possible. If + `SERD_NO_INLINE_OBJECTS` is given, a simple sorted stream is written + instead, which is significantly faster since no searching is required, but + can result in ugly output for Turtle or Trig. +*/ +SERD_API +SerdStatus +serd_range_serialise(const SerdRange* SERD_NONNULL range, + const SerdSink* SERD_NONNULL sink, + SerdSerialisationFlags flags); + +/** @} @defgroup serd_cursor Cursor @{ diff --git a/meson.build b/meson.build index fba0cfcb..a8a45785 100644 --- a/meson.build +++ b/meson.build @@ -46,6 +46,7 @@ if get_option('strict') '-Wno-format-nonliteral', '-Wno-inline', '-Wno-padded', + '-Wno-strict-overflow', '-Wno-switch-default', '-Wno-unsuffixed-float-constants', '-Wno-unused-const-variable', @@ -90,10 +91,14 @@ sources = [ 'src/byte_source.c', 'src/cursor.c', 'src/env.c', + 'src/inserter.c', + 'src/iter.c', + 'src/model.c', 'src/n3.c', 'src/node.c', 'src/node_syntax.c', 'src/nodes.c', + 'src/range.c', 'src/reader.c', 'src/sink.c', 'src/statement.c', @@ -103,6 +108,7 @@ sources = [ 'src/uri.c', 'src/world.c', 'src/writer.c', + 'src/zix/btree.c', 'src/zix/digest.c', 'src/zix/hash.c', ] diff --git a/src/inserter.c b/src/inserter.c new file mode 100644 index 00000000..4c19772c --- /dev/null +++ b/src/inserter.c @@ -0,0 +1,128 @@ +/* + Copyright 2011-2020 David Robillard <d@drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "model.h" +#include "statement.h" +#include "world.h" + +#include "serd/serd.h" + +#include <stdlib.h> + +typedef struct { + SerdEnv* env; + SerdModel* model; + const SerdNode* default_graph; +} SerdInserterData; + +static bool +can_insert(SerdWorld* const world, const SerdNode* const node) +{ + if (node) { + switch (serd_node_type(node)) { + case SERD_LITERAL: + return can_insert(world, serd_node_datatype(node)); + + case SERD_URI: + if (!serd_uri_string_has_scheme(serd_node_string(node))) { + SERD_LOG_ERRORF(world, + SERD_ERR_BAD_ARG, + "attempt to insert relative URI <%s> into model\n", + serd_node_string(node)); + return false; + } + break; + + case SERD_CURIE: + SERD_LOG_ERRORF(world, + SERD_ERR_BAD_ARG, + "attempt to insert prefixed name \"%s\" into model\n", + serd_node_string(node)); + return false; + + case SERD_BLANK: + case SERD_VARIABLE: + break; + } + } + + return true; +} + +static SerdStatus +serd_inserter_on_statement(SerdInserterData* data, + const SerdStatementFlags flags, + const SerdStatement* statement) +{ + (void)flags; + + SerdWorld* const world = data->model->world; + + // Check that every node is expanded so it is context-free + for (unsigned i = 0; i < 4; ++i) { + if (!can_insert(world, serd_statement_node(statement, (SerdField)i))) { + return SERD_ERR_BAD_ARG; + } + } + + const SerdNode* const s = + serd_nodes_intern(world->nodes, serd_statement_subject(statement)); + + const SerdNode* const p = + serd_nodes_intern(world->nodes, serd_statement_predicate(statement)); + + const SerdNode* const o = + serd_nodes_intern(world->nodes, serd_statement_object(statement)); + + const SerdNode* const g = + serd_statement_graph(statement) + ? serd_nodes_intern(world->nodes, serd_statement_graph(statement)) + : data->default_graph; + + const SerdCursor* const cur = + (data->model->flags & SERD_STORE_CURSORS) ? statement->cursor : NULL; + + const SerdStatus st = serd_model_add_internal(data->model, cur, s, p, o, g); + + return st > SERD_FAILURE ? st : SERD_SUCCESS; +} + +static SerdStatus +serd_inserter_on_event(SerdInserterData* data, const SerdEvent* event) +{ + if (event->type == SERD_STATEMENT) { + return serd_inserter_on_statement( + data, event->statement.flags, event->statement.statement); + } + + return SERD_SUCCESS; +} + +SerdSink* +serd_inserter_new(SerdModel* model, SerdEnv* env, const SerdNode* default_graph) +{ + SerdInserterData* const data = + (SerdInserterData*)calloc(1, sizeof(SerdInserterData)); + + data->env = env; + data->model = model; + data->default_graph = serd_nodes_intern(model->world->nodes, default_graph); + + SerdSink* const sink = + serd_sink_new(data, (SerdEventFunc)serd_inserter_on_event, free); + + return sink; +} diff --git a/src/iter.c b/src/iter.c new file mode 100644 index 00000000..62c3ef4f --- /dev/null +++ b/src/iter.c @@ -0,0 +1,211 @@ +/* + Copyright 2011-2020 David Robillard <d@drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "iter.h" + +#include "model.h" +#include "node.h" +#include "world.h" + +#include "serd/serd.h" +#include "zix/btree.h" + +#include <assert.h> +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> + +static bool +serd_iter_pattern_matches(const SerdIter* iter, const SerdQuad pat) +{ + const SerdStatement* key = (const SerdStatement*)zix_btree_get(iter->cur); + for (int i = 0; i < iter->n_prefix; ++i) { + const int idx = orderings[iter->order][i]; + if (!serd_node_pattern_match(key->nodes[idx], pat[idx])) { + return false; + } + } + + return true; +} + +/** + Seek forward as necessary until `iter` points at a matching statement. + + @return true iff iterator reached end of valid range. +*/ +static bool +serd_iter_seek_match(SerdIter* iter, const SerdQuad pat) +{ + for (; !zix_btree_iter_is_end(iter->cur); + zix_btree_iter_increment(iter->cur)) { + const SerdStatement* s = (const SerdStatement*)zix_btree_get(iter->cur); + if (serd_statement_matches_quad(s, pat)) { + return false; // Found match + } + + if (iter->mode == FILTER_RANGE && !serd_iter_pattern_matches(iter, pat)) { + zix_btree_iter_free(iter->cur); + iter->cur = NULL; + return true; // Reached end of range + } + } + + assert(zix_btree_iter_is_end(iter->cur)); + return true; // Reached end of index +} + +static bool +check_version(const SerdIter* const iter) +{ + if (!iter || iter->version != iter->model->version) { + SERD_LOG_ERROR(iter->model->world, + SERD_ERR_BAD_ITER, + "attempt to use invalidated iterator\n"); + return false; + } + return true; +} + +SerdIter* +serd_iter_new(const SerdModel* model, + ZixBTreeIter* cur, + const SerdQuad pat, + SerdOrder order, + SearchMode mode, + int n_prefix) +{ + SerdIter* iter = (SerdIter*)calloc(1, sizeof(SerdIter)); + iter->model = model; + iter->cur = cur; + iter->order = order; + iter->mode = mode; + iter->n_prefix = n_prefix; + iter->version = model->version; + + switch (iter->mode) { + case ALL: + case RANGE: + assert(zix_btree_iter_is_end(iter->cur) || + serd_statement_matches_quad( + ((const SerdStatement*)zix_btree_get(iter->cur)), pat)); + break; + case FILTER_RANGE: + case FILTER_ALL: + serd_iter_seek_match(iter, pat); + break; + } + + // Replace (possibly temporary) nodes in pattern with nodes from the model + if (!zix_btree_iter_is_end(iter->cur)) { + const SerdStatement* s = (const SerdStatement*)zix_btree_get(iter->cur); + for (int i = 0; i < TUP_LEN; ++i) { + if (pat[i]) { + iter->pat[i] = s->nodes[i]; + } + } + } + + return iter; +} + +SerdIter* +serd_iter_copy(const SerdIter* iter) +{ + if (!iter) { + return NULL; + } + + SerdIter* copy = (SerdIter*)malloc(sizeof(SerdIter)); + memcpy(copy, iter, sizeof(SerdIter)); + copy->cur = zix_btree_iter_copy(iter->cur); + return copy; +} + +const SerdStatement* +serd_iter_get(const SerdIter* iter) +{ + return ((check_version(iter) && !zix_btree_iter_is_end(iter->cur)) + ? (const SerdStatement*)zix_btree_get(iter->cur) + : NULL); +} + +bool +serd_iter_scan_next(SerdIter* iter) +{ + if (zix_btree_iter_is_end(iter->cur)) { + return true; + } + + bool is_end = false; + switch (iter->mode) { + case ALL: + // At the end if the cursor is (assigned above) + break; + case RANGE: + // At the end if the MSNs no longer match + if (!serd_iter_pattern_matches(iter, iter->pat)) { + zix_btree_iter_free(iter->cur); + iter->cur = NULL; + is_end = true; + } + break; + case FILTER_RANGE: + case FILTER_ALL: + // Seek forward to next match + is_end = serd_iter_seek_match(iter, iter->pat); + break; + } + + return is_end; +} + +bool +serd_iter_next(SerdIter* iter) +{ + if (zix_btree_iter_is_end(iter->cur) || !check_version(iter)) { + return true; + } + + zix_btree_iter_increment(iter->cur); + return serd_iter_scan_next(iter); +} + +bool +serd_iter_equals(const SerdIter* lhs, const SerdIter* rhs) +{ + if (!lhs && !rhs) { + return true; + } + + return (lhs && rhs && lhs->model == rhs->model && + zix_btree_iter_equals(lhs->cur, rhs->cur) && + serd_node_pattern_match(lhs->pat[0], rhs->pat[0]) && + serd_node_pattern_match(lhs->pat[1], rhs->pat[1]) && + serd_node_pattern_match(lhs->pat[2], rhs->pat[2]) && + serd_node_pattern_match(lhs->pat[3], rhs->pat[3]) && + lhs->order == rhs->order && lhs->mode == rhs->mode && + lhs->n_prefix == rhs->n_prefix); +} + +void +serd_iter_free(SerdIter* iter) +{ + if (iter) { + zix_btree_iter_free(iter->cur); + free(iter); + } +} diff --git a/src/iter.h b/src/iter.h new file mode 100644 index 00000000..0b0d7e5b --- /dev/null +++ b/src/iter.h @@ -0,0 +1,98 @@ +/* + Copyright 2011-2020 David Robillard <d@drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_ITER_H +#define SERD_ITER_H + +#include "statement.h" + +#include "serd/serd.h" +#include "zix/btree.h" + +#include <stdbool.h> +#include <stdint.h> + +/** Triple ordering */ +typedef enum { + SPO, ///< Subject, Predicate, Object + SOP, ///< Subject, Object, Predicate + OPS, ///< Object, Predicate, Subject + OSP, ///< Object, Subject, Predicate + PSO, ///< Predicate, Subject, Object + POS, ///< Predicate, Object, Subject + GSPO, ///< Graph, Subject, Predicate, Object + GSOP, ///< Graph, Subject, Object, Predicate + GOPS, ///< Graph, Object, Predicate, Subject + GOSP, ///< Graph, Object, Subject, Predicate + GPSO, ///< Graph, Predicate, Subject, Object + GPOS ///< Graph, Predicate, Object, Subject +} SerdOrder; + +/** Mode for searching or iteration */ +typedef enum { + ALL, ///< Iterate over entire store + RANGE, ///< Iterate over range with equal prefix + FILTER_RANGE, ///< Iterate over range with equal prefix, filtering + FILTER_ALL ///< Iterate to end of store, filtering +} SearchMode; + +struct SerdIterImpl { + const SerdModel* model; ///< Model being iterated over + uint64_t version; ///< Model version when iterator was created + ZixBTreeIter* cur; ///< Current DB cursor + SerdQuad pat; ///< Pattern (in ordering order) + SerdOrder order; ///< Store order (which index) + SearchMode mode; ///< Iteration mode + int n_prefix; ///< Prefix for RANGE and FILTER_RANGE +}; + +#define NUM_ORDERS 12 +#define TUP_LEN 4 + +/** + Quads of indices for each order, from most to least significant + (array indexed by SordOrder) +*/ +static const int orderings[NUM_ORDERS][TUP_LEN] = { + {0, 1, 2, 3}, + {0, 2, 1, 3}, // SPO, SOP + {2, 1, 0, 3}, + {2, 0, 1, 3}, // OPS, OSP + {1, 0, 2, 3}, + {1, 2, 0, 3}, // PSO, POS + {3, 0, 1, 2}, + {3, 0, 2, 1}, // GSPO, GSOP + {3, 2, 1, 0}, + {3, 2, 0, 1}, // GOPS, GOSP + {3, 1, 0, 2}, + {3, 1, 2, 0} // GPSO, GPOS +}; + +SerdIter* +serd_iter_new(const SerdModel* model, + ZixBTreeIter* cur, + const SerdQuad pat, + SerdOrder order, + SearchMode mode, + int n_prefix); + +bool +serd_iter_scan_next(SerdIter* iter); + +bool +serd_quad_match(const SerdQuad x, const SerdQuad y); + +#endif // SERD_ITER_H diff --git a/src/model.c b/src/model.c new file mode 100644 index 00000000..f3e56971 --- /dev/null +++ b/src/model.c @@ -0,0 +1,658 @@ +/* + Copyright 2011-2020 David Robillard <d@drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "model.h" + +#include "cursor.h" +#include "iter.h" +#include "node.h" +#include "range.h" +#include "statement.h" +#include "world.h" + +#include "zix/btree.h" +#include "zix/common.h" + +#include <assert.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdlib.h> + +#define DEFAULT_ORDER SPO +#define DEFAULT_GRAPH_ORDER GSPO + +/** + Compare quads lexicographically, ignoring graph. + + NULL IDs (equal to 0) are treated as wildcards, always less than every + other possible ID, except itself. +*/ +static int +serd_triple_compare(const void* x, const void* y, void* user_data) +{ + const int* const ordering = (const int*)user_data; + const SerdStatement* const s = (const SerdStatement*)x; + const SerdStatement* const t = (const SerdStatement*)y; + + for (int i = 0; i < TUP_LEN; ++i) { + const int o = ordering[i]; + if (o != SERD_GRAPH) { + const int c = serd_node_wildcard_compare(s->nodes[o], t->nodes[o]); + if (c) { + return c; + } + } + } + + return 0; +} + +/** + Compare quads lexicographically, with exact (non-wildcard) graph matching. +*/ +static int +serd_quad_compare(const void* x, const void* y, void* user_data) +{ + const SerdStatement* const s = (const SerdStatement*)x; + const SerdStatement* const t = (const SerdStatement*)y; + + // Compare graph without wildcard matching + const int cmp = serd_node_compare(s->nodes[SERD_GRAPH], t->nodes[SERD_GRAPH]); + + return cmp ? cmp : serd_triple_compare(x, y, user_data); +} + +/** + Return true iff `serd` has an index for `order`. + If `graphs` is true, `order` will be modified to be the + corresponding order with a G prepended (so G will be the MSN). +*/ +static inline bool +serd_model_has_index(const SerdModel* model, + SerdOrder* order, + int* n_prefix, + bool graphs) +{ + if (graphs) { + *order = (SerdOrder)(*order + GSPO); + *n_prefix += 1; + } + + return model->indices[*order]; +} + +/** + Return the best available index for a pattern. + @param pat Pattern in standard (S P O G) order + @param mode Set to the (best) iteration mode for iterating over results + @param n_prefix Set to the length of the range prefix + (for `mode` == RANGE and `mode` == FILTER_RANGE) +*/ +static SerdOrder +serd_model_best_index(const SerdModel* model, + const SerdQuad pat, + SearchMode* mode, + int* n_prefix) +{ + const bool graph_search = (pat[SERD_GRAPH] != 0); + + const unsigned sig = + ((pat[0] ? 1u : 0u) * 0x100 + (pat[1] ? 1u : 0u) * 0x010 + + (pat[2] ? 1u : 0u) * 0x001); + + SerdOrder good[2] = {(SerdOrder)-1, (SerdOrder)-1}; + +#define PAT_CASE(sig, m, g0, g1, np) \ + case sig: \ + *mode = m; \ + good[0] = g0; \ + good[1] = g1; \ + *n_prefix = np; \ + break + + // Good orderings that don't require filtering + *mode = RANGE; + *n_prefix = 0; + switch (sig) { + case 0x000: + assert(graph_search); + *mode = RANGE; + *n_prefix = 1; + return DEFAULT_GRAPH_ORDER; + case 0x111: + *mode = RANGE; + *n_prefix = graph_search ? 4 : 3; + return graph_search ? DEFAULT_GRAPH_ORDER : DEFAULT_ORDER; + + PAT_CASE(0x001, RANGE, OPS, OSP, 1); + PAT_CASE(0x010, RANGE, POS, PSO, 1); + PAT_CASE(0x011, RANGE, OPS, POS, 2); + PAT_CASE(0x100, RANGE, SPO, SOP, 1); + PAT_CASE(0x101, RANGE, SOP, OSP, 2); + PAT_CASE(0x110, RANGE, SPO, PSO, 2); + default: + break; + } + + if (*mode == RANGE) { + if (serd_model_has_index(model, &good[0], n_prefix, graph_search)) { + return good[0]; + } + + if (serd_model_has_index(model, &good[1], n_prefix, graph_search)) { + return good[1]; + } + } + + // Not so good orderings that require filtering, but can + // still be constrained to a range + switch (sig) { + PAT_CASE(0x011, FILTER_RANGE, OSP, PSO, 1); + PAT_CASE(0x101, FILTER_RANGE, SPO, OPS, 1); + // SPO is always present, so 0x110 is never reached here + default: + break; + } + + if (*mode == FILTER_RANGE) { + if (serd_model_has_index(model, &good[0], n_prefix, graph_search)) { + return good[0]; + } + + if (serd_model_has_index(model, &good[1], n_prefix, graph_search)) { + return good[1]; + } + } + + if (graph_search) { + *mode = FILTER_RANGE; + *n_prefix = 1; + return DEFAULT_GRAPH_ORDER; + } + + *mode = FILTER_ALL; + return DEFAULT_ORDER; +} + +SerdModel* +serd_model_new(SerdWorld* world, SerdModelFlags flags) +{ + SerdModel* model = (SerdModel*)calloc(1, sizeof(struct SerdModelImpl)); + + flags |= SERD_INDEX_SPO; // SPO index is mandatory + + model->world = world; + model->flags = flags; + + for (unsigned i = 0; i < (NUM_ORDERS / 2); ++i) { + const int* const ordering = orderings[i]; + const int* const g_ordering = orderings[i + (NUM_ORDERS / 2)]; + + if (flags & (1u << i)) { + model->indices[i] = zix_btree_new( + (ZixComparator)serd_triple_compare, (const void*)ordering, NULL); + if (flags & SERD_INDEX_GRAPHS) { + model->indices[i + (NUM_ORDERS / 2)] = zix_btree_new( + (ZixComparator)serd_quad_compare, (const void*)g_ordering, NULL); + } + } + } + + // Create end iterator + const SerdOrder order = model->indices[GSPO] ? GSPO : SPO; + ZixBTreeIter* cur = zix_btree_end(model->indices[order]); + const SerdQuad pat = {0, 0, 0, 0}; + + model->end = serd_iter_new(model, cur, pat, order, ALL, 0); + + return model; +} + +SerdModel* +serd_model_copy(const SerdModel* model) +{ + if (!model) { + return NULL; + } + + SerdModel* copy = serd_model_new(model->world, model->flags); + + SerdRange* all = serd_model_all(model); + serd_model_add_range(copy, all); + serd_range_free(all); + + assert(serd_model_size(model) == serd_model_size(copy)); + return copy; +} + +SERD_API +bool +serd_model_equals(const SerdModel* a, const SerdModel* b) +{ + if (!a && !b) { + return true; + } + + if (!a || !b || serd_model_size(a) != serd_model_size(b)) { + return false; + } + + SerdRange* ra = serd_model_all(a); + SerdRange* rb = serd_model_all(b); + + bool result = true; + while (!serd_range_empty(ra) && !serd_range_empty(rb)) { + if (!serd_statement_equals(serd_range_front(ra), serd_range_front(rb))) { + result = false; + break; + } + + serd_range_next(ra); + serd_range_next(rb); + } + + result = result && serd_range_empty(ra) && serd_range_empty(rb); + + serd_range_free(ra); + serd_range_free(rb); + return result; +} + +static void +serd_model_drop_statement(SerdModel* model, SerdStatement* statement) + +{ + for (int i = 0; i < TUP_LEN; ++i) { + if (statement->nodes[i]) { + serd_nodes_deref(model->world->nodes, statement->nodes[i]); + } + } + + if (statement->cursor && statement->cursor->file) { + serd_nodes_deref(model->world->nodes, statement->cursor->file); + } + + serd_statement_free(statement); +} + +void +serd_model_free(SerdModel* model) +{ + if (!model) { + return; + } + + serd_iter_free(model->end); + + ZixBTree* index = + model->indices[model->indices[DEFAULT_GRAPH_ORDER] ? DEFAULT_GRAPH_ORDER + : DEFAULT_ORDER]; + + // Free statements + ZixBTreeIter* t = zix_btree_begin(index); + for (; !zix_btree_iter_is_end(t); zix_btree_iter_increment(t)) { + serd_model_drop_statement(model, (SerdStatement*)zix_btree_get(t)); + } + zix_btree_iter_free(t); + + // Free indices + for (unsigned o = 0; o < NUM_ORDERS; ++o) { + if (model->indices[o]) { + zix_btree_free(model->indices[o]); + } + } + + free(model); +} + +SerdWorld* +serd_model_world(SerdModel* model) +{ + return model->world; +} + +SerdModelFlags +serd_model_flags(const SerdModel* model) +{ + return model->flags; +} + +size_t +serd_model_size(const SerdModel* model) +{ + const SerdOrder order = model->indices[GSPO] ? GSPO : SPO; + return zix_btree_size(model->indices[order]); +} + +bool +serd_model_empty(const SerdModel* model) +{ + return serd_model_size(model) == 0; +} + +SerdIter* +serd_model_begin(const SerdModel* model) +{ + if (serd_model_size(model) == 0) { + return serd_iter_copy(serd_model_end(model)); + } + + const SerdOrder order = model->indices[GSPO] ? GSPO : SPO; + ZixBTreeIter* cur = zix_btree_begin(model->indices[order]); + const SerdQuad pat = {0, 0, 0, 0}; + return serd_iter_new(model, cur, pat, order, ALL, 0); +} + +const SerdIter* +serd_model_end(const SerdModel* model) +{ + return model->end; +} + +SerdRange* +serd_model_all(const SerdModel* model) +{ + return serd_range_new(serd_model_begin(model), + serd_iter_copy(serd_model_end(model))); +} + +SerdIter* +serd_model_find(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + const SerdQuad pat = {s, p, o, g}; + if (!pat[0] && !pat[1] && !pat[2] && !pat[3]) { + return serd_model_begin(model); + } + + SearchMode mode = ALL; + int n_prefix = 0; + const SerdOrder index_order = + serd_model_best_index(model, pat, &mode, &n_prefix); + + ZixBTree* const db = model->indices[index_order]; + ZixBTreeIter* cur = NULL; + + if (mode == FILTER_ALL) { + // No prefix shared with an index at all, linear search (worst case) + cur = zix_btree_begin(db); + } else if (mode == FILTER_RANGE) { + /* Some prefix, but filtering still required. Build a search pattern + with only the prefix to find the lower bound in log time. */ + SerdQuad prefix_pat = {NULL, NULL, NULL, NULL}; + const int* const ordering = orderings[index_order]; + for (int i = 0; i < n_prefix; ++i) { + prefix_pat[ordering[i]] = pat[ordering[i]]; + } + zix_btree_lower_bound(db, prefix_pat, &cur); + } else { + // Ideal case, pattern matches an index with no filtering required + zix_btree_lower_bound(db, pat, &cur); + } + + if (zix_btree_iter_is_end(cur)) { + zix_btree_iter_free(cur); + return NULL; + } + + const SerdStatement* const key = (const SerdStatement*)zix_btree_get(cur); + if (!key || (mode == RANGE && !serd_statement_matches_quad(key, pat))) { + zix_btree_iter_free(cur); + return NULL; + } + + return serd_iter_new(model, cur, pat, index_order, mode, n_prefix); +} + +SerdRange* +serd_model_range(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + if (!s && !p && !o && !g) { + return serd_range_new(serd_model_begin(model), + serd_iter_copy(serd_model_end(model))); + } + + SerdIter* begin = serd_model_find(model, s, p, o, g); + SerdIter* end = + begin + ? serd_iter_new( + model, NULL, begin->pat, begin->order, begin->mode, begin->n_prefix) + : NULL; + return serd_range_new(begin, end); +} + +const SerdNode* +serd_model_get(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + const SerdStatement* statement = serd_model_get_statement(model, s, p, o, g); + + if (statement) { + if (!s) { + return serd_statement_subject(statement); + } + + if (!p) { + return serd_statement_predicate(statement); + } + + if (!o) { + return serd_statement_object(statement); + } + + if (!g) { + return serd_statement_graph(statement); + } + } + + return NULL; +} + +const SerdStatement* +serd_model_get_statement(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + if ((bool)s + (bool)p + (bool)o != 2 && + (bool)s + (bool)p + (bool)o + (bool)g != 3) { + return NULL; + } + + SerdIter* const i = serd_model_find(model, s, p, o, g); + if (i && i->cur) { + const SerdStatement* statement = serd_iter_get(i); + serd_iter_free(i); + return statement; + } + + return NULL; +} + +size_t +serd_model_count(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + SerdRange* const range = serd_model_range(model, s, p, o, g); + size_t count = 0; + + for (; !serd_range_empty(range); serd_range_next(range)) { + ++count; + } + + serd_range_free(range); + return count; +} + +bool +serd_model_ask(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + SerdIter* iter = serd_model_find(model, s, p, o, g); + const bool ret = iter && !zix_btree_iter_is_end(iter->cur); + serd_iter_free(iter); + return ret; +} + +static SerdCursor* +serd_model_intern_cursor(SerdModel* model, const SerdCursor* cursor) +{ + if (cursor) { + SerdCursor* copy = (SerdCursor*)calloc(1, sizeof(SerdCursor)); + + copy->file = serd_nodes_intern(model->world->nodes, cursor->file); + copy->line = cursor->line; + copy->col = cursor->col; + return copy; + } + + return NULL; +} + +SerdStatus +serd_model_add_internal(SerdModel* model, + const SerdCursor* cursor, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + SerdStatement* statement = (SerdStatement*)calloc(1, sizeof(SerdStatement)); + + statement->nodes[0] = s; + statement->nodes[1] = p; + statement->nodes[2] = o; + statement->nodes[3] = g; + statement->cursor = serd_model_intern_cursor(model, cursor); + + bool added = false; + for (unsigned i = 0; i < NUM_ORDERS; ++i) { + if (model->indices[i]) { + if (!zix_btree_insert(model->indices[i], statement)) { + added = true; + } else if (i == GSPO) { + break; // Statement already indexed + } + } + } + + ++model->version; + if (added) { + return SERD_SUCCESS; + } + + serd_model_drop_statement(model, statement); + return SERD_FAILURE; +} + +SerdStatus +serd_model_add(SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g) +{ + if (!s || !p || !o) { + return SERD_LOG_ERROR(model->world, + SERD_ERR_BAD_ARG, + "attempt to add statement with NULL field\n"); + } + + return serd_model_add_internal(model, + NULL, + serd_nodes_intern(model->world->nodes, s), + serd_nodes_intern(model->world->nodes, p), + serd_nodes_intern(model->world->nodes, o), + serd_nodes_intern(model->world->nodes, g)); +} + +SerdStatus +serd_model_insert(SerdModel* model, const SerdStatement* statement) +{ + if (!statement) { + return SERD_FAILURE; + } + + SerdNodes* nodes = model->world->nodes; + return serd_model_add_internal( + model, + serd_statement_cursor(statement), + serd_nodes_intern(nodes, serd_statement_subject(statement)), + serd_nodes_intern(nodes, serd_statement_predicate(statement)), + serd_nodes_intern(nodes, serd_statement_object(statement)), + serd_nodes_intern(nodes, serd_statement_graph(statement))); +} + +SerdStatus +serd_model_add_range(SerdModel* model, SerdRange* range) +{ + SerdStatus st = SERD_SUCCESS; + for (; !st && !serd_range_empty(range); serd_range_next(range)) { + st = + serd_model_insert(model, (const SerdStatement*)serd_range_front(range)); + } + + return st; +} + +SerdStatus +serd_model_erase(SerdModel* model, SerdIter* iter) +{ + const SerdStatement* statement = serd_iter_get(iter); + + SerdStatement* removed = NULL; + for (int i = SPO; i <= GPOS; ++i) { + if (model->indices[i]) { + zix_btree_remove(model->indices[i], + statement, + (void**)&removed, + i == (int)iter->order ? &iter->cur : NULL); + } + } + serd_iter_scan_next(iter); + + serd_model_drop_statement(model, removed); + iter->version = ++model->version; + + return SERD_SUCCESS; +} + +SerdStatus +serd_model_erase_range(SerdModel* model, SerdRange* range) +{ + while (!serd_range_empty(range)) { + serd_model_erase(model, range->begin); + } + + return SERD_SUCCESS; +} diff --git a/src/model.h b/src/model.h new file mode 100644 index 00000000..ebf2faf9 --- /dev/null +++ b/src/model.h @@ -0,0 +1,41 @@ +/* + Copyright 2011-2020 David Robillard <d@drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_MODEL_H +#define SERD_MODEL_H + +#include "serd/serd.h" +#include "zix/btree.h" + +#include <stdint.h> + +struct SerdModelImpl { + SerdWorld* world; ///< World this model is a part of + ZixBTree* indices[12]; ///< Trees of SordQuad + SerdIter* end; ///< End iterator (always the same) + uint64_t version; ///< Version incremented on every change + SerdModelFlags flags; ///< Active indices and features +}; + +SerdStatus +serd_model_add_internal(SerdModel* model, + const SerdCursor* cursor, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + +#endif // SERD_MODEL_H @@ -20,6 +20,7 @@ #include "exess/exess.h" #include "serd/serd.h" +#include <stdbool.h> #include <stddef.h> struct SerdNodeImpl { @@ -40,6 +41,20 @@ serd_node_buffer_c(const SerdNode* SERD_NONNULL node) return (const char*)(node + 1); } +static inline int +serd_node_wildcard_compare(const SerdNode* SERD_NULLABLE a, + const SerdNode* SERD_NULLABLE b) +{ + return (!a || !b) ? 0 : serd_node_compare(a, b); +} + +static inline bool +serd_node_pattern_match(const SerdNode* SERD_NULLABLE a, + const SerdNode* SERD_NULLABLE b) +{ + return !a || !b || serd_node_equals(a, b); +} + SerdNode* SERD_ALLOCATED serd_node_malloc(size_t n_bytes, SerdNodeFlags flags, SerdNodeType type); diff --git a/src/range.c b/src/range.c new file mode 100644 index 00000000..89465b9c --- /dev/null +++ b/src/range.c @@ -0,0 +1,326 @@ +/* + Copyright 2011-2020 David Robillard <d@drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "range.h" + +#include "iter.h" +#include "model.h" +#include "world.h" + +#include "zix/common.h" +#include "zix/digest.h" +#include "zix/hash.h" + +#include <stdbool.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +typedef enum { NAMED, ANON_S, ANON_O, LIST_S, LIST_O } NodeStyle; + +static SerdStatus +write_range_statement(const SerdSink* sink, + const SerdModel* model, + ZixHash* list_subjects, + unsigned depth, + SerdStatementFlags flags, + const SerdStatement* statement); + +SerdRange* +serd_range_new(SerdIter* const begin, SerdIter* const end) +{ + SerdRange* range = (SerdRange*)malloc(sizeof(SerdRange)); + + range->begin = begin; + range->end = end; + + return range; +} + +SerdRange* +serd_range_copy(const SerdRange* range) +{ + SerdRange* copy = (SerdRange*)malloc(sizeof(SerdRange)); + + memcpy(copy, range, sizeof(SerdRange)); + copy->begin = serd_iter_copy(range->begin); + copy->end = serd_iter_copy(range->end); + + return copy; +} + +void +serd_range_free(SerdRange* range) +{ + if (range) { + serd_iter_free(range->begin); + serd_iter_free(range->end); + free(range); + } +} + +const SerdStatement* +serd_range_front(const SerdRange* range) +{ + return serd_iter_get(range->begin); +} + +bool +serd_range_equals(const SerdRange* lhs, const SerdRange* rhs) +{ + return (!lhs && !rhs) || + (lhs && rhs && serd_iter_equals(lhs->begin, rhs->begin) && + serd_iter_equals(lhs->end, rhs->end)); +} + +bool +serd_range_next(SerdRange* range) +{ + return serd_iter_next(range->begin); +} + +bool +serd_range_empty(const SerdRange* range) +{ + return !range || serd_iter_equals(range->begin, range->end); +} + +const SerdIter* +serd_range_cbegin(const SerdRange* range) +{ + return range->begin; +} + +const SerdIter* +serd_range_cend(const SerdRange* range) +{ + return range->end; +} + +SerdIter* +serd_range_begin(SerdRange* range) +{ + return range->begin; +} + +SerdIter* +serd_range_end(SerdRange* range) +{ + return range->end; +} + +static NodeStyle +get_node_style(const SerdModel* model, const SerdNode* node) +{ + if (serd_node_type(node) != SERD_BLANK) { + return NAMED; // Non-blank node can't be anonymous + } + + size_t n_as_object = 0; + SerdRange* range = serd_model_range(model, NULL, NULL, node, NULL); + for (; !serd_range_empty(range); serd_range_next(range), ++n_as_object) { + if (n_as_object == 1) { + serd_range_free(range); + return NAMED; // Blank node referred to several times + } + } + + serd_range_free(range); + + if (serd_model_ask(model, node, model->world->rdf_first, NULL, NULL) && + serd_model_ask(model, node, model->world->rdf_rest, NULL, NULL)) { + return n_as_object == 0 ? LIST_S : LIST_O; + } + + return n_as_object == 0 ? ANON_S : ANON_O; +} + +static uint32_t +ptr_hash(const void* ptr) +{ + return zix_digest_add_ptr(zix_digest_start(), ptr); +} + +static bool +ptr_equals(const void* a, const void* b) +{ + return *(const void* const*)a == *(const void* const*)b; +} + +static SerdStatus +write_pretty_range(const SerdSink* sink, + const unsigned depth, + const SerdStatementFlags flags, + const SerdModel* model, + SerdRange* range) +{ + (void)flags; + + ZixHash* list_subjects = zix_hash_new(ptr_hash, ptr_equals, sizeof(void*)); + + SerdStatus st = SERD_SUCCESS; + for (; !st && !serd_range_empty(range); serd_range_next(range)) { + st = write_range_statement( + sink, model, list_subjects, depth, 0, serd_range_front(range)); + } + + zix_hash_free(list_subjects); + + return st; +} + +static SerdStatus +write_list(const SerdSink* sink, + const SerdModel* model, + ZixHash* list_subjects, + const unsigned depth, + SerdStatementFlags flags, + const SerdNode* object) +{ + SerdStatus st = SERD_SUCCESS; + const SerdWorld* world = model->world; + const SerdNode* first = world->rdf_first; + const SerdNode* rest = world->rdf_rest; + const SerdNode* nil = world->rdf_nil; + + SerdIter* f = serd_model_find(model, object, first, NULL, NULL); + while (!st && f && !serd_node_equals(object, nil)) { + const SerdStatement* fs = serd_iter_get(f); + if (!fs) { + st = SERD_ERR_INTERNAL; + break; + } + + st = + write_range_statement(sink, model, list_subjects, depth + 1, flags, fs); + + serd_iter_free(f); + f = NULL; + flags = 0; + if (st) { + break; + } + + SerdIter* const r = serd_model_find(model, object, rest, NULL, NULL); + + const SerdStatement* const s = r ? serd_iter_get(r) : NULL; + const SerdNode* next = s ? serd_statement_object(s) : NULL; + + f = next ? serd_model_find(model, next, first, NULL, NULL) : NULL; + if (r && f) { + // This and next node are good, write rdf:next statement + st = serd_sink_write_statement(sink, 0, s); + object = next; + } else { + // Terminate malformed list + st = + serd_sink_write(sink, 0, object, rest, nil, serd_statement_graph(fs)); + f = NULL; + } + + serd_iter_free(r); + } + + serd_iter_free(f); + return st; +} + +static SerdStatus +write_range_statement(const SerdSink* sink, + const SerdModel* model, + ZixHash* list_subjects, + const unsigned depth, + SerdStatementFlags flags, + const SerdStatement* statement) +{ + if (!statement) { + return SERD_FAILURE; + } + + const SerdNode* subject = serd_statement_subject(statement); + const NodeStyle subject_style = get_node_style(model, subject); + + if (depth == 0 && (subject_style == ANON_O || subject_style == LIST_O)) { + return SERD_SUCCESS; // Skip subject that will be inlined somewhere + } + + if (subject_style == LIST_S && depth == 0 && + (serd_node_equals(serd_statement_predicate(statement), + model->world->rdf_first) || + (serd_node_equals(serd_statement_predicate(statement), + model->world->rdf_rest)))) { + return SERD_SUCCESS; + } + + if (subject_style == ANON_S) { + flags |= SERD_EMPTY_S; // Write anonymous subjects in "[] p o" style + } + + const SerdNode* object = serd_statement_object(statement); + const NodeStyle object_style = get_node_style(model, object); + SerdStatus st = SERD_SUCCESS; + if (subject_style == LIST_S && depth == 0) { + if (zix_hash_insert(list_subjects, &subject, NULL) != ZIX_STATUS_EXISTS) { + st = + write_list(sink, model, list_subjects, depth + 1, SERD_LIST_S, subject); + } + } + + if (object_style == ANON_O) { + flags |= SERD_ANON_O; + + SerdRange* range = serd_model_range(model, object, NULL, NULL, NULL); + st = st ? st : serd_sink_write_statement(sink, flags, statement); + st = st ? st : write_pretty_range(sink, depth + 1, 0, model, range); + st = st ? st : serd_sink_write_end(sink, object); + serd_range_free(range); + + return st; + } + + if (object_style == LIST_O) { + flags |= SERD_LIST_O; + st = st ? st : serd_sink_write_statement(sink, flags, statement); + st = st ? st : write_list(sink, model, list_subjects, depth + 1, 0, object); + return st; + } + + return serd_sink_write_statement(sink, flags, statement); +} + +SerdStatus +serd_range_serialise(const SerdRange* range, + const SerdSink* sink, + const SerdSerialisationFlags flags) +{ + SerdStatus st = SERD_SUCCESS; + if (serd_range_empty(range)) { + return st; + } + + SerdRange* const copy = serd_range_copy(range); + if (flags & SERD_NO_INLINE_OBJECTS) { + for (; !st && !serd_range_empty(copy); serd_range_next(copy)) { + const SerdStatement* const f = serd_range_front(copy); + st = f ? serd_sink_write_statement(sink, 0, f) : SERD_ERR_INTERNAL; + } + } else { + st = write_pretty_range(sink, 0, 0, range->begin->model, copy); + } + + serd_range_free(copy); + return st; +} diff --git a/src/range.h b/src/range.h new file mode 100644 index 00000000..cbfb3f54 --- /dev/null +++ b/src/range.h @@ -0,0 +1,35 @@ +/* + Copyright 2011-2020 David Robillard <d@drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_RANGE_H +#define SERD_RANGE_H + +#include "serd/serd.h" + +#include <stdbool.h> + +struct SerdRangeImpl { + SerdIter* begin; ///< Iterator to first statement in range + SerdIter* end; ///< Iterator to end of range +}; + +SerdRange* +serd_range_new(SerdIter* begin, SerdIter* end); + +bool +serd_range_scan_next(SerdRange* range); + +#endif // SERD_RANGE_H diff --git a/src/serdi.c b/src/serdi.c index 2eea3919..0955079f 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -60,11 +60,12 @@ print_usage(const char* name, bool error) fprintf(os, " -b Fast bulk output for large serialisations.\n"); fprintf(os, " -c PREFIX Chop PREFIX from matching blank node IDs.\n"); fprintf(os, " -e Eat input one character at a time.\n"); - fprintf(os, " -f Keep full URIs in input (don't qualify).\n"); + fprintf(os, " -f Fast and loose mode (possibly ugly output).\n"); fprintf(os, " -h Display this help and exit.\n"); fprintf(os, " -i SYNTAX Input syntax: turtle/ntriples/trig/nquads.\n"); fprintf(os, " -k BYTES Parser stack size.\n"); fprintf(os, " -l Lax (non-strict) parsing.\n"); + fprintf(os, " -m Build and serialise a model (no streaming).\n"); fprintf(os, " -o SYNTAX Output syntax: turtle/ntriples/nquads.\n"); fprintf(os, " -p PREFIX Add PREFIX to blank node IDs.\n"); fprintf(os, " -q Suppress all output except data.\n"); @@ -144,7 +145,9 @@ main(int argc, char** argv) SerdWriterFlags writer_flags = 0; bool bulk_read = true; bool bulk_write = false; + bool no_inline = false; bool osyntax_set = false; + bool use_model = false; bool quiet = false; size_t stack_size = 4194304; const char* input_string = NULL; @@ -171,12 +174,15 @@ main(int argc, char** argv) } else if (argv[a][1] == 'e') { bulk_read = false; } else if (argv[a][1] == 'f') { + no_inline = true; writer_flags |= (SERD_WRITE_UNQUALIFIED | SERD_WRITE_UNRESOLVED); } else if (argv[a][1] == 'h') { return print_usage(argv[0], false); } else if (argv[a][1] == 'l') { reader_flags |= SERD_READ_LAX; writer_flags |= SERD_WRITE_LAX; + } else if (argv[a][1] == 'm') { + use_model = true; } else if (argv[a][1] == 'q') { quiet = true; } else if (argv[a][1] == 'v') { @@ -287,6 +293,9 @@ main(int argc, char** argv) } #endif + const SerdSerialisationFlags serialisation_flags = + no_inline ? SERD_NO_INLINE_OBJECTS : 0u; + const size_t block_size = bulk_write ? 4096u : 1u; SerdByteSink* const byte_sink = out_filename @@ -301,6 +310,20 @@ main(int argc, char** argv) SerdWriter* const writer = serd_writer_new(world, output_syntax, writer_flags, env, byte_sink); + SerdModel* model = NULL; + SerdSink* inserter = NULL; + const SerdSink* sink = NULL; + if (use_model) { + const SerdModelFlags flags = SERD_INDEX_SPO | + (input_has_graphs ? SERD_INDEX_GRAPHS : 0u) | + (no_inline ? 0u : SERD_INDEX_OPS); + model = serd_model_new(world, flags); + inserter = serd_inserter_new(model, env, NULL); + sink = inserter; + } else { + sink = serd_writer_sink(writer); + } + if (quiet) { serd_world_set_log_func(world, serd_quiet_error_func, NULL); } @@ -321,7 +344,7 @@ main(int argc, char** argv) input_syntax ? input_syntax : SERD_TRIG, reader_flags, env, - serd_writer_sink(writer), + sink, stack_size); serd_reader_add_blank_prefix(reader, add_prefix); @@ -358,7 +381,7 @@ main(int argc, char** argv) input_syntax, reader_flags, env, - serd_writer_sink(writer), + sink, stack_size, inputs[i], n_inputs > 1 ? prefix : add_prefix, @@ -368,6 +391,18 @@ main(int argc, char** argv) } free(prefix); + if (st <= SERD_FAILURE && use_model) { + const SerdSink* writer_sink = serd_writer_sink(writer); + SerdRange* range = serd_model_all(model); + + serd_env_write_prefixes(env, writer_sink); + + st = serd_range_serialise(range, writer_sink, serialisation_flags); + serd_range_free(range); + } + + serd_sink_free(inserter); + serd_model_free(model); serd_writer_free(writer); serd_node_free(input_name); serd_env_free(env); diff --git a/src/statement.c b/src/statement.c index c9bac440..a080a99f 100644 --- a/src/statement.c +++ b/src/statement.c @@ -16,6 +16,56 @@ #include "statement.h" +#include "cursor.h" +#include "node.h" + +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> + +SerdStatement* +serd_statement_new(const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g, + const SerdCursor* cursor) +{ + SerdStatement* statement = (SerdStatement*)malloc(sizeof(SerdStatement)); + if (statement) { + statement->nodes[0] = s; + statement->nodes[1] = p; + statement->nodes[2] = o; + statement->nodes[3] = g; + statement->cursor = serd_cursor_copy(cursor); + } + return statement; +} + +SerdStatement* +serd_statement_copy(const SerdStatement* statement) +{ + if (!statement) { + return NULL; + } + + SerdStatement* copy = (SerdStatement*)malloc(sizeof(SerdStatement)); + memcpy(copy, statement, sizeof(SerdStatement)); + if (statement->cursor) { + copy->cursor = (SerdCursor*)malloc(sizeof(SerdCursor)); + memcpy(copy->cursor, statement->cursor, sizeof(SerdCursor)); + } + return copy; +} + +void +serd_statement_free(SerdStatement* statement) +{ + if (statement) { + free(statement->cursor); + free(statement); + } +} + const SerdNode* serd_statement_node(const SerdStatement* statement, SerdField field) { @@ -51,3 +101,31 @@ serd_statement_cursor(const SerdStatement* statement) { return statement->cursor; } + +bool +serd_statement_equals(const SerdStatement* a, const SerdStatement* b) +{ + return (a == b || (a && b && serd_node_equals(a->nodes[0], b->nodes[0]) && + serd_node_equals(a->nodes[1], b->nodes[1]) && + serd_node_equals(a->nodes[2], b->nodes[2]) && + serd_node_equals(a->nodes[3], b->nodes[3]))); +} + +bool +serd_statement_matches(const SerdStatement* statement, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* graph) +{ + return (serd_node_pattern_match(statement->nodes[0], subject) && + serd_node_pattern_match(statement->nodes[1], predicate) && + serd_node_pattern_match(statement->nodes[2], object) && + serd_node_pattern_match(statement->nodes[3], graph)); +} + +bool +serd_statement_matches_quad(const SerdStatement* statement, const SerdQuad quad) +{ + return serd_statement_matches(statement, quad[0], quad[1], quad[2], quad[3]); +} diff --git a/src/statement.h b/src/statement.h index a5baaa6a..917a2692 100644 --- a/src/statement.h +++ b/src/statement.h @@ -19,6 +19,8 @@ #include "serd/serd.h" +#include <stdbool.h> + /** Quad of nodes (a statement), or a quad pattern. @@ -31,4 +33,9 @@ struct SerdStatementImpl { SerdCursor* cursor; }; +SERD_PURE_FUNC +bool +serd_statement_matches_quad(const SerdStatement* statement, + const SerdQuad quad); + #endif // SERD_STATEMENT_H diff --git a/src/string.c b/src/string.c index 4c6a1135..4461b058 100644 --- a/src/string.c +++ b/src/string.c @@ -46,6 +46,8 @@ serd_strerror(SerdStatus status) return "Invalid syntax"; case SERD_ERR_BAD_ARG: return "Invalid argument"; + case SERD_ERR_BAD_ITER: + return "Invalid iterator"; case SERD_ERR_NOT_FOUND: return "Not found"; case SERD_ERR_ID_CLASH: diff --git a/test/.clang-tidy b/test/.clang-tidy index b7dd752b..8e55b93e 100644 --- a/test/.clang-tidy +++ b/test/.clang-tidy @@ -10,6 +10,7 @@ Checks: > -clang-analyzer-nullability.NullabilityBase, -clang-analyzer-nullability.NullableDereferenced, -clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling, + -clang-analyzer-valist.Uninitialized, -cppcoreguidelines-avoid-non-const-global-variables, -hicpp-signed-bitwise, -llvmlibc-*, diff --git a/test/meson.build b/test/meson.build index 79ffc85d..84045225 100644 --- a/test/meson.build +++ b/test/meson.build @@ -6,12 +6,15 @@ unit_tests = [ 'cursor', 'env', 'free_null', + 'model', 'node', 'node_syntax', 'nodes', 'overflow', 'read_chunk', 'reader_writer', + 'sink', + 'statement', 'string', 'terse_write', 'uri', diff --git a/test/run_test_suite.py b/test/run_test_suite.py index d25fb3a1..05dc81ca 100755 --- a/test/run_test_suite.py +++ b/test/run_test_suite.py @@ -208,6 +208,23 @@ def _file_equals(patha, pathb): return _show_diff(fa.readlines(), fb.readlines(), patha, pathb) +def _file_lines_equal(patha, pathb, subst_from="", subst_to=""): + import io + + for path in (patha, pathb): + if not os.access(path, os.F_OK): + sys.stderr.write("error: missing file %s" % path) + return False + + la = sorted(set(io.open(patha, encoding="utf-8").readlines())) + lb = sorted(set(io.open(pathb, encoding="utf-8").readlines())) + if la != lb: + _show_diff(la, lb, patha, pathb) + return False + + return True + + def test_suite( manifest_path, base_uri, @@ -306,6 +323,18 @@ def test_suite( command_prefix, ) + # Run model test for positive test (must succeed) + out_filename = os.path.join(out_test_dir, test_name + ".model.out") + + with open(out_filename, "w") as stdout: + proc = subprocess.run([command[0]] + ['-m'] + command[1:], + check=True, + stdout=stdout) + + if proc.returncode == 0 and ((mf + 'result') in model[test]): + if not _file_lines_equal(check_path, out_filename): + results.n_failures += 1 + else: # Negative test with open(out_filename, "w") as stdout: with tempfile.TemporaryFile() as stderr: diff --git a/test/test_free_null.c b/test/test_free_null.c index a101379a..aee79f0e 100644 --- a/test/test_free_null.c +++ b/test/test_free_null.c @@ -33,6 +33,10 @@ main(void) serd_reader_free(NULL); serd_writer_free(NULL); serd_nodes_free(NULL); + serd_model_free(NULL); + serd_statement_free(NULL); + serd_iter_free(NULL); + serd_range_free(NULL); serd_cursor_free(NULL); return 0; diff --git a/test/test_model.c b/test/test_model.c new file mode 100644 index 00000000..cd1728e8 --- /dev/null +++ b/test/test_model.c @@ -0,0 +1,900 @@ +/* + Copyright 2011-2018 David Robillard <d@drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#undef NDEBUG + +#include <assert.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "serd/serd.h" + +#define WILDCARD_NODE NULL + +#define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#" +#define RDF_FIRST NS_RDF "first" +#define RDF_REST NS_RDF "rest" +#define RDF_NIL NS_RDF "nil" + +#define N_OBJECTS_PER 2U + +typedef const SerdNode* Quad[4]; + +typedef struct { + Quad query; + int expected_num_results; +} QueryTest; + +static const SerdNode* +manage(SerdWorld* world, SerdNode* node) +{ + return serd_nodes_manage(serd_world_nodes(world), node); +} + +static const SerdNode* +uri(SerdWorld* world, const unsigned num) +{ + char str[] = "eg:0000000000"; + snprintf(str + 3, 11, "%03u", num); + return manage(world, serd_new_uri(SERD_STATIC_STRING(str))); +} + +static int +generate(SerdWorld* world, + SerdModel* model, + size_t n_quads, + const SerdNode* graph) +{ + SerdNodes* nodes = serd_world_nodes(world); + + for (unsigned i = 0; i < n_quads; ++i) { + unsigned num = (i * N_OBJECTS_PER) + 1U; + + const SerdNode* ids[2 + N_OBJECTS_PER]; + for (unsigned j = 0; j < 2 + N_OBJECTS_PER; ++j) { + ids[j] = uri(world, num++); + } + + for (unsigned j = 0; j < N_OBJECTS_PER; ++j) { + assert(!serd_model_add(model, ids[0], ids[1], ids[2 + j], graph)); + } + } + + // Add some literals + + // (98 4 "hello") and (98 4 "hello"^^<5>) + const SerdNode* hello = + manage(world, serd_new_string(SERD_STATIC_STRING("hello"))); + + const SerdNode* hello_gb = + manage(world, + serd_new_plain_literal(SERD_STATIC_STRING("hello"), + SERD_STATIC_STRING("en-gb"))); + + const SerdNode* hello_us = + manage(world, + serd_new_plain_literal(SERD_STATIC_STRING("hello"), + SERD_STATIC_STRING("en-us"))); + + const SerdNode* hello_t4 = serd_nodes_manage( + nodes, + serd_new_typed_literal(SERD_STATIC_STRING("hello"), + serd_node_string_view(uri(world, 4)))); + + const SerdNode* hello_t5 = serd_nodes_manage( + nodes, + serd_new_typed_literal(SERD_STATIC_STRING("hello"), + serd_node_string_view(uri(world, 5)))); + + assert(!serd_model_add(model, uri(world, 98), uri(world, 4), hello, graph)); + assert( + !serd_model_add(model, uri(world, 98), uri(world, 4), hello_t5, graph)); + + // (96 4 "hello"^^<4>) and (96 4 "hello"^^<5>) + assert( + !serd_model_add(model, uri(world, 96), uri(world, 4), hello_t4, graph)); + assert( + !serd_model_add(model, uri(world, 96), uri(world, 4), hello_t5, graph)); + + // (94 5 "hello") and (94 5 "hello"@en-gb) + assert(!serd_model_add(model, uri(world, 94), uri(world, 5), hello, graph)); + assert( + !serd_model_add(model, uri(world, 94), uri(world, 5), hello_gb, graph)); + + // (92 6 "hello"@en-us) and (92 6 "hello"@en-gb) + assert( + !serd_model_add(model, uri(world, 92), uri(world, 6), hello_us, graph)); + assert( + !serd_model_add(model, uri(world, 92), uri(world, 6), hello_gb, graph)); + + // (14 6 "bonjour"@fr) and (14 6 "salut"@fr) + const SerdNode* const bonjour = + manage(world, + serd_new_plain_literal(SERD_STATIC_STRING("bonjour"), + SERD_STATIC_STRING("fr"))); + + const SerdNode* const salut = + manage(world, + serd_new_plain_literal(SERD_STATIC_STRING("salut"), + SERD_STATIC_STRING("fr"))); + + assert(!serd_model_add(model, uri(world, 14), uri(world, 6), bonjour, graph)); + assert(!serd_model_add(model, uri(world, 14), uri(world, 6), salut, graph)); + + // Attempt to add duplicates + assert(serd_model_add(model, uri(world, 14), uri(world, 6), salut, graph)); + + // Add a blank node subject + const SerdNode* ablank = + manage(world, serd_new_blank(SERD_STATIC_STRING("ablank"))); + + assert(!serd_model_add(model, ablank, uri(world, 6), salut, graph)); + + // Add statement with URI object + assert(!serd_model_add(model, ablank, uri(world, 6), uri(world, 7), graph)); + + return EXIT_SUCCESS; +} + +static int +test_read(SerdWorld* world, + SerdModel* model, + const SerdNode* g, + const unsigned n_quads) +{ + SerdIter* iter = serd_model_begin(model); + const SerdStatement* prev = NULL; + for (; !serd_iter_equals(iter, serd_model_end(model)); serd_iter_next(iter)) { + const SerdStatement* statement = serd_iter_get(iter); + assert(statement); + assert(serd_statement_subject(statement)); + assert(serd_statement_predicate(statement)); + assert(serd_statement_object(statement)); + assert(!serd_statement_equals(statement, prev)); + assert(!serd_statement_equals(prev, statement)); + prev = statement; + } + + // Attempt to increment past end + assert(serd_iter_next(iter)); + serd_iter_free(iter); + + const SerdStringView s = SERD_STATIC_STRING("hello"); + + const SerdNode* plain_hello = manage(world, serd_new_string(s)); + + const SerdNode* type4_hello = manage( + world, serd_new_typed_literal(s, serd_node_string_view(uri(world, 4)))); + + const SerdNode* type5_hello = manage( + world, serd_new_typed_literal(s, serd_node_string_view(uri(world, 5)))); + + const SerdNode* gb_hello = + manage(world, serd_new_plain_literal(s, SERD_STATIC_STRING("en-gb"))); + + const SerdNode* us_hello = + manage(world, serd_new_plain_literal(s, SERD_STATIC_STRING("en-us"))); + +#define NUM_PATTERNS 18 + + QueryTest patterns[NUM_PATTERNS] = { + {{NULL, NULL, NULL}, (int)(n_quads * N_OBJECTS_PER) + 12}, + {{uri(world, 1), WILDCARD_NODE, WILDCARD_NODE}, 2}, + {{uri(world, 9), uri(world, 9), uri(world, 9)}, 0}, + {{uri(world, 1), uri(world, 2), uri(world, 4)}, 1}, + {{uri(world, 3), uri(world, 4), WILDCARD_NODE}, 2}, + {{WILDCARD_NODE, uri(world, 2), uri(world, 4)}, 1}, + {{WILDCARD_NODE, WILDCARD_NODE, uri(world, 4)}, 1}, + {{uri(world, 1), WILDCARD_NODE, WILDCARD_NODE}, 2}, + {{uri(world, 1), WILDCARD_NODE, uri(world, 4)}, 1}, + {{WILDCARD_NODE, uri(world, 2), WILDCARD_NODE}, 2}, + {{uri(world, 98), uri(world, 4), plain_hello}, 1}, + {{uri(world, 98), uri(world, 4), type5_hello}, 1}, + {{uri(world, 96), uri(world, 4), type4_hello}, 1}, + {{uri(world, 96), uri(world, 4), type5_hello}, 1}, + {{uri(world, 94), uri(world, 5), plain_hello}, 1}, + {{uri(world, 94), uri(world, 5), gb_hello}, 1}, + {{uri(world, 92), uri(world, 6), gb_hello}, 1}, + {{uri(world, 92), uri(world, 6), us_hello}, 1}}; + + Quad match = {uri(world, 1), uri(world, 2), uri(world, 4), g}; + assert(serd_model_ask(model, match[0], match[1], match[2], match[3])); + + Quad nomatch = {uri(world, 1), uri(world, 2), uri(world, 9), g}; + assert( + !serd_model_ask(model, nomatch[0], nomatch[1], nomatch[2], nomatch[3])); + + assert(!serd_model_get(model, NULL, NULL, uri(world, 3), g)); + assert(!serd_model_get(model, uri(world, 1), uri(world, 99), NULL, g)); + + assert(serd_node_equals( + serd_model_get(model, uri(world, 1), uri(world, 2), NULL, g), + uri(world, 3))); + assert(serd_node_equals( + serd_model_get(model, uri(world, 1), NULL, uri(world, 3), g), + uri(world, 2))); + assert(serd_node_equals( + serd_model_get(model, NULL, uri(world, 2), uri(world, 3), g), + uri(world, 1))); + if (g) { + assert(serd_node_equals( + serd_model_get(model, uri(world, 1), uri(world, 2), uri(world, 3), NULL), + g)); + } + + for (unsigned i = 0; i < NUM_PATTERNS; ++i) { + QueryTest test = patterns[i]; + Quad pat = {test.query[0], test.query[1], test.query[2], g}; + + SerdRange* range = serd_model_range(model, pat[0], pat[1], pat[2], pat[3]); + int num_results = 0; + for (; !serd_range_empty(range); serd_range_next(range)) { + ++num_results; + + const SerdStatement* first = serd_range_front(range); + assert(first); + assert(serd_statement_matches(first, pat[0], pat[1], pat[2], pat[3])); + } + + serd_range_free(range); + + assert(num_results == test.expected_num_results); + } + + // Query blank node subject + + const SerdNode* ablank = + manage(world, serd_new_blank(SERD_STATIC_STRING("ablank"))); + + Quad pat = {ablank, 0, 0}; + int num_results = 0; + SerdRange* range = serd_model_range(model, pat[0], pat[1], pat[2], pat[3]); + for (; !serd_range_empty(range); serd_range_next(range)) { + ++num_results; + const SerdStatement* statement = serd_range_front(range); + assert(serd_statement_matches(statement, pat[0], pat[1], pat[2], pat[3])); + } + serd_range_free(range); + + assert(num_results == 2); + + // Test nested queries + const SerdNode* last_subject = 0; + range = serd_model_range(model, NULL, NULL, NULL, NULL); + for (; !serd_range_empty(range); serd_range_next(range)) { + const SerdStatement* statement = serd_range_front(range); + const SerdNode* subject = serd_statement_subject(statement); + if (subject == last_subject) { + continue; + } + + Quad subpat = {subject, 0, 0}; + SerdRange* subrange = + serd_model_range(model, subpat[0], subpat[1], subpat[2], subpat[3]); + const SerdStatement* substatement = serd_range_front(subrange); + uint64_t num_sub_results = 0; + assert(serd_statement_subject(substatement) == subject); + for (; !serd_range_empty(subrange); serd_range_next(subrange)) { + assert(serd_statement_matches(serd_range_front(subrange), + subpat[0], + subpat[1], + subpat[2], + subpat[3])); + ++num_sub_results; + } + serd_range_free(subrange); + assert(num_sub_results == N_OBJECTS_PER); + + uint64_t count = serd_model_count(model, subject, 0, 0, 0); + assert(count == num_sub_results); + + last_subject = subject; + } + serd_range_free(range); + + return 0; +} + +static SerdStatus +expected_error(void* handle, const SerdLogEntry* entry) +{ + (void)handle; + + fprintf(stderr, "expected: "); + vfprintf(stderr, entry->fmt, *entry->args); + return SERD_SUCCESS; +} + +static int +test_free_null(SerdWorld* world, const unsigned n_quads) +{ + (void)world; + (void)n_quads; + + serd_model_free(NULL); // Shouldn't crash + return 0; +} + +static int +test_get_world(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_INDEX_SPO); + assert(serd_model_world(model) == world); + serd_model_free(model); + return 0; +} + +static int +test_get_flags(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + const SerdModelFlags flags = SERD_INDEX_OPS | SERD_INDEX_GRAPHS; + SerdModel* model = serd_model_new(world, flags); + assert(serd_model_flags(model) == (SERD_INDEX_SPO | flags)); + serd_model_free(model); + return 0; +} + +static int +test_all_begin(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_INDEX_SPO); + SerdRange* all = serd_model_all(model); + SerdIter* begin = serd_model_find(model, NULL, NULL, NULL, NULL); + assert(serd_iter_equals(serd_range_begin(all), begin)); + assert(serd_iter_equals(serd_range_cbegin(all), begin)); + + serd_range_free(all); + serd_iter_free(begin); + serd_model_free(model); + return 0; +} + +static int +test_add_null(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_INDEX_SPO); + + serd_world_set_log_func(world, expected_error, NULL); + + assert(serd_model_add(model, 0, 0, 0, 0)); + assert(serd_model_add(model, uri(world, 1), 0, 0, 0)); + assert(serd_model_add(model, uri(world, 1), uri(world, 2), 0, 0)); + assert(serd_model_empty(model)); + + serd_model_free(model); + return 0; +} + +static int +test_add_with_iterator(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_INDEX_SPO); + + serd_world_set_log_func(world, expected_error, NULL); + assert( + !serd_model_add(model, uri(world, 1), uri(world, 2), uri(world, 3), 0)); + + // Add a statement with an active iterator + SerdIter* iter = serd_model_begin(model); + assert( + !serd_model_add(model, uri(world, 1), uri(world, 2), uri(world, 4), 0)); + + // Check that iterator has been invalidated + assert(!serd_iter_get(iter)); + assert(serd_iter_next(iter)); + + serd_iter_free(iter); + serd_model_free(model); + return 0; +} + +static int +test_erase_with_iterator(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_INDEX_SPO); + + serd_world_set_log_func(world, expected_error, NULL); + assert( + !serd_model_add(model, uri(world, 1), uri(world, 2), uri(world, 3), 0)); + assert( + !serd_model_add(model, uri(world, 4), uri(world, 5), uri(world, 6), 0)); + + // Erase a statement with an active iterator + SerdIter* iter1 = serd_model_begin(model); + SerdIter* iter2 = serd_model_begin(model); + assert(!serd_model_erase(model, iter1)); + + // Check that erased iterator points to the next statement + const SerdStatement* const s1 = serd_iter_get(iter1); + assert(s1); + assert( + serd_statement_matches(s1, uri(world, 4), uri(world, 5), uri(world, 6), 0)); + + // Check that other iterator has been invalidated + assert(!serd_iter_get(iter2)); + assert(serd_iter_next(iter2)); + + serd_iter_free(iter2); + serd_iter_free(iter1); + serd_model_free(model); + return 0; +} + +static int +test_add_erase(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_INDEX_SPO | SERD_INDEX_GRAPHS); + + // Add (s p "hello") + const SerdNode* s = uri(world, 1); + const SerdNode* p = uri(world, 2); + const SerdNode* hello = + manage(world, serd_new_string(SERD_STATIC_STRING("hello"))); + + assert(!serd_model_add(model, s, p, hello, 0)); + assert(serd_model_ask(model, s, p, hello, 0)); + + // Add (s p "hi") + const SerdNode* hi = manage(world, serd_new_string(SERD_STATIC_STRING("hi"))); + assert(!serd_model_add(model, s, p, hi, NULL)); + assert(serd_model_ask(model, s, p, hi, 0)); + + // Erase (s p "hi") + SerdIter* iter = serd_model_find(model, s, p, hi, NULL); + assert(!serd_model_erase(model, iter)); + assert(serd_model_size(model) == 1); + serd_iter_free(iter); + + // Check that erased statement can not be found + SerdRange* empty = serd_model_range(model, s, p, hi, NULL); + assert(serd_range_empty(empty)); + serd_range_free(empty); + + serd_model_free(model); + return 0; +} + +static int +test_erase_all(SerdWorld* world, const unsigned n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_INDEX_SPO); + generate(world, model, n_quads, NULL); + + SerdIter* iter = serd_model_begin(model); + while (!serd_iter_equals(iter, serd_model_end(model))) { + assert(!serd_model_erase(model, iter)); + } + + serd_iter_free(iter); + serd_model_free(model); + return 0; +} + +static int +test_copy(SerdWorld* world, const unsigned n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_INDEX_SPO); + generate(world, model, n_quads, NULL); + + SerdModel* copy = serd_model_copy(model); + assert(serd_model_equals(model, copy)); + + serd_model_free(model); + serd_model_free(copy); + return 0; +} + +static int +test_equals(SerdWorld* world, const unsigned n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_INDEX_SPO); + generate(world, model, n_quads, NULL); + serd_model_add( + model, uri(world, 0), uri(world, 1), uri(world, 2), uri(world, 3)); + + assert(serd_model_equals(NULL, NULL)); + assert(!serd_model_equals(NULL, model)); + assert(!serd_model_equals(model, NULL)); + + SerdModel* empty = serd_model_new(world, SERD_INDEX_SPO); + assert(!serd_model_equals(model, empty)); + + SerdModel* different = serd_model_new(world, SERD_INDEX_SPO); + generate(world, different, n_quads, NULL); + serd_model_add( + different, uri(world, 1), uri(world, 1), uri(world, 2), uri(world, 3)); + + assert(serd_model_size(model) == serd_model_size(different)); + assert(!serd_model_equals(model, different)); + + serd_model_free(model); + serd_model_free(empty); + serd_model_free(different); + return 0; +} + +static int +test_find_past_end(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_INDEX_SPO); + const SerdNode* s = uri(world, 1); + const SerdNode* p = uri(world, 2); + const SerdNode* o = uri(world, 3); + assert(!serd_model_add(model, s, p, o, 0)); + assert(serd_model_ask(model, s, p, o, 0)); + + const SerdNode* huge = uri(world, 999); + SerdRange* range = serd_model_range(model, huge, huge, huge, 0); + assert(serd_range_empty(range)); + + serd_range_free(range); + serd_model_free(model); + return 0; +} + +static int +test_range(SerdWorld* world, const unsigned n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_INDEX_SPO); + generate(world, model, n_quads, NULL); + + SerdRange* range1 = serd_model_all(model); + SerdRange* range2 = serd_model_all(model); + + assert(!serd_range_empty(range1)); + assert(serd_range_empty(NULL)); + + assert(!serd_range_equals(range1, NULL)); + assert(!serd_range_equals(NULL, range1)); + assert(serd_range_equals(range1, range2)); + + assert(serd_iter_equals(serd_range_begin(range1), serd_range_begin(range2))); + assert( + serd_iter_equals(serd_range_cbegin(range1), serd_range_cbegin(range2))); + assert(serd_iter_equals(serd_range_end(range1), serd_range_end(range2))); + assert(serd_iter_equals(serd_range_cend(range1), serd_range_cend(range2))); + + assert(!serd_range_next(range2)); + assert(!serd_range_equals(range1, range2)); + + serd_range_free(range2); + serd_range_free(range1); + serd_model_free(model); + + return 0; +} + +static int +test_iter_comparison(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_INDEX_SPO); + + assert(serd_iter_equals(serd_iter_copy(NULL), NULL)); + + serd_world_set_log_func(world, expected_error, NULL); + assert( + !serd_model_add(model, uri(world, 1), uri(world, 2), uri(world, 3), 0)); + + // Add a statement with an active iterator + SerdIter* iter1 = serd_model_begin(model); + SerdIter* iter2 = serd_model_begin(model); + assert(serd_iter_equals(iter1, iter2)); + + serd_iter_next(iter1); + assert(!serd_iter_equals(iter1, iter2)); + + const SerdIter* end = serd_model_end(model); + assert(serd_iter_equals(iter1, end)); + + serd_iter_free(iter2); + serd_iter_free(iter1); + serd_model_free(model); + return 0; +} + +static int +test_triple_index_read(SerdWorld* world, const unsigned n_quads) +{ + for (unsigned i = 0; i < 6; ++i) { + SerdModel* model = serd_model_new(world, (1u << i)); + generate(world, model, n_quads, 0); + assert(!test_read(world, model, 0, n_quads)); + serd_model_free(model); + } + + return 0; +} + +static int +test_quad_index_read(SerdWorld* world, const unsigned n_quads) +{ + for (unsigned i = 0; i < 6; ++i) { + SerdModel* model = serd_model_new(world, (1u << i) | SERD_INDEX_GRAPHS); + const SerdNode* graph = uri(world, 42); + generate(world, model, n_quads, graph); + assert(!test_read(world, model, graph, n_quads)); + serd_model_free(model); + } + + return 0; +} + +static int +test_remove_graph(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_INDEX_SPO | SERD_INDEX_GRAPHS); + + // Generate a couple of graphs + const SerdNode* graph42 = uri(world, 42); + const SerdNode* graph43 = uri(world, 43); + generate(world, model, 1, graph42); + generate(world, model, 1, graph43); + + // Remove one graph via range + SerdRange* range = serd_model_range(model, NULL, NULL, NULL, graph43); + SerdStatus st = serd_model_erase_range(model, range); + assert(!st); + serd_range_free(range); + + // Erase the first tuple (an element in the default graph) + SerdIter* iter = serd_model_begin(model); + assert(!serd_model_erase(model, iter)); + serd_iter_free(iter); + + // Ensure only the other graph is left + Quad pat = {0, 0, 0, graph42}; + for (iter = serd_model_begin(model); + !serd_iter_equals(iter, serd_model_end(model)); + serd_iter_next(iter)) { + const SerdStatement* const s = serd_iter_get(iter); + assert(s); + assert(serd_statement_matches(s, pat[0], pat[1], pat[2], pat[3])); + } + serd_iter_free(iter); + + serd_model_free(model); + return 0; +} + +static int +test_default_graph(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_INDEX_SPO | SERD_INDEX_GRAPHS); + const SerdNode* s = uri(world, 1); + const SerdNode* p = uri(world, 2); + const SerdNode* o = uri(world, 3); + const SerdNode* g1 = uri(world, 101); + const SerdNode* g2 = uri(world, 102); + + // Insert the same statement into two graphs + assert(!serd_model_add(model, s, p, o, g1)); + assert(!serd_model_add(model, s, p, o, g2)); + + // Ensure we only see statement once in the default graph + assert(serd_model_count(model, s, p, o, NULL) == 1); + + serd_model_free(model); + return 0; +} + +static int +test_write_bad_list(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_INDEX_SPO | SERD_INDEX_GRAPHS); + SerdNodes* nodes = serd_nodes_new(); + + const SerdNode* s = manage(world, serd_new_uri(SERD_STATIC_STRING("urn:s"))); + const SerdNode* p = manage(world, serd_new_uri(SERD_STATIC_STRING("urn:p"))); + const SerdNode* list1 = + manage(world, serd_new_blank(SERD_STATIC_STRING("l1"))); + const SerdNode* list2 = + manage(world, serd_new_blank(SERD_STATIC_STRING("l2"))); + const SerdNode* nofirst = + manage(world, serd_new_blank(SERD_STATIC_STRING("nof"))); + const SerdNode* norest = + manage(world, serd_new_blank(SERD_STATIC_STRING("nor"))); + const SerdNode* pfirst = + manage(world, serd_new_uri(SERD_STATIC_STRING(RDF_FIRST))); + const SerdNode* prest = + manage(world, serd_new_uri(SERD_STATIC_STRING(RDF_REST))); + const SerdNode* val1 = + manage(world, serd_new_string(SERD_STATIC_STRING("a"))); + const SerdNode* val2 = + manage(world, serd_new_string(SERD_STATIC_STRING("b"))); + + // List where second node has no rdf:first + serd_model_add(model, s, p, list1, NULL); + serd_model_add(model, list1, pfirst, val1, NULL); + serd_model_add(model, list1, prest, nofirst, NULL); + + // List where second node has no rdf:rest + serd_model_add(model, s, p, list2, NULL); + serd_model_add(model, list2, pfirst, val1, NULL); + serd_model_add(model, list2, prest, norest, NULL); + serd_model_add(model, norest, pfirst, val2, NULL); + + SerdBuffer buffer = {NULL, 0}; + SerdEnv* env = serd_env_new(SERD_EMPTY_STRING()); + SerdByteSink* out = serd_byte_sink_new_buffer(&buffer); + SerdWriter* writer = serd_writer_new(world, SERD_TURTLE, 0, env, out); + + SerdRange* all = serd_model_all(model); + serd_range_serialise(all, serd_writer_sink(writer), 0); + serd_range_free(all); + + serd_writer_finish(writer); + const char* str = serd_buffer_sink_finish(&buffer); + const char* expected = "<urn:s>\n" + " <urn:p> (\n" + " \"a\"\n" + " ) , (\n" + " \"a\"\n" + " \"b\"\n" + " ) .\n"; + + assert(!strcmp(str, expected)); + + free(buffer.buf); + serd_writer_free(writer); + serd_byte_sink_free(out); + serd_model_free(model); + serd_env_free(env); + serd_nodes_free(nodes); + return 0; +} + +typedef struct { + size_t n_written; + size_t max_successes; +} FailingWriteFuncState; + +/// Write function that fails after a certain number of writes +static size_t +failing_write_func(const void* buf, size_t size, size_t nmemb, void* stream) +{ + (void)buf; + (void)size; + (void)nmemb; + + FailingWriteFuncState* state = (FailingWriteFuncState*)stream; + + return (++state->n_written > state->max_successes) ? 0 : nmemb; +} + +static int +test_write_error_in_list(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + serd_world_set_log_func(world, expected_error, NULL); + + SerdModel* model = serd_model_new(world, SERD_INDEX_SPO); + SerdNodes* nodes = serd_nodes_new(); + + const SerdNode* s = manage(world, serd_new_uri(SERD_STATIC_STRING("urn:s"))); + const SerdNode* p = manage(world, serd_new_uri(SERD_STATIC_STRING("urn:p"))); + const SerdNode* l1 = manage(world, serd_new_blank(SERD_STATIC_STRING("l1"))); + + const SerdNode* one = manage(world, serd_new_integer(1, NULL)); + const SerdNode* l2 = manage(world, serd_new_blank(SERD_STATIC_STRING("l2"))); + const SerdNode* two = manage(world, serd_new_integer(2, NULL)); + + const SerdNode* rdf_first = + manage(world, serd_new_uri(SERD_STATIC_STRING(RDF_FIRST))); + const SerdNode* rdf_rest = + manage(world, serd_new_uri(SERD_STATIC_STRING(RDF_REST))); + const SerdNode* rdf_nil = + manage(world, serd_new_uri(SERD_STATIC_STRING(RDF_NIL))); + + serd_model_add(model, s, p, l1, NULL); + serd_model_add(model, l1, rdf_first, one, NULL); + serd_model_add(model, l1, rdf_rest, l2, NULL); + serd_model_add(model, l2, rdf_first, two, NULL); + serd_model_add(model, l2, rdf_rest, rdf_nil, NULL); + + SerdEnv* env = serd_env_new(SERD_EMPTY_STRING()); + + for (size_t max_successes = 0; max_successes < 21; ++max_successes) { + FailingWriteFuncState state = {0, max_successes}; + SerdByteSink* out = + serd_byte_sink_new_function(failing_write_func, &state, 1); + SerdWriter* writer = serd_writer_new(world, SERD_TURTLE, 0, env, out); + + const SerdSink* const sink = serd_writer_sink(writer); + SerdRange* const all = serd_model_all(model); + const SerdStatus st = serd_range_serialise(all, sink, 0); + serd_range_free(all); + + assert(st == SERD_ERR_BAD_WRITE); + + serd_writer_free(writer); + serd_byte_sink_free(out); + } + + serd_env_free(env); + serd_model_free(model); + serd_nodes_free(nodes); + return 0; +} + +int +main(void) +{ + static const unsigned n_quads = 300; + + serd_model_free(NULL); // Shouldn't crash + + typedef int (*TestFunc)(SerdWorld*, unsigned); + + const TestFunc tests[] = {test_free_null, + test_get_world, + test_get_flags, + test_all_begin, + test_add_null, + test_add_with_iterator, + test_erase_with_iterator, + test_add_erase, + test_erase_all, + test_copy, + test_equals, + test_find_past_end, + test_range, + test_iter_comparison, + test_triple_index_read, + test_quad_index_read, + test_remove_graph, + test_default_graph, + test_write_bad_list, + test_write_error_in_list, + NULL}; + + SerdWorld* world = serd_world_new(); + int ret = 0; + + for (const TestFunc* t = tests; *t; ++t) { + serd_world_set_log_func(world, NULL, NULL); + ret += (*t)(world, n_quads); + } + + serd_world_free(world); + return ret; +} diff --git a/test/test_sink.c b/test/test_sink.c new file mode 100644 index 00000000..b1eb8df8 --- /dev/null +++ b/test/test_sink.c @@ -0,0 +1,151 @@ +/* + Copyright 2019-2020 David Robillard <d@drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#undef NDEBUG + +#include "serd/serd.h" + +#include <assert.h> +#include <stddef.h> + +#define NS_EG "http://example.org/" + +typedef struct { + const SerdNode* last_base; + const SerdNode* last_name; + const SerdNode* last_namespace; + const SerdNode* last_end; + const SerdStatement* last_statement; + SerdStatus return_status; +} State; + +static SerdStatus +on_base(void* handle, const SerdNode* uri) +{ + State* state = (State*)handle; + + state->last_base = uri; + return state->return_status; +} + +static SerdStatus +on_prefix(void* handle, const SerdNode* name, const SerdNode* uri) +{ + State* state = (State*)handle; + + state->last_name = name; + state->last_namespace = uri; + return state->return_status; +} + +static SerdStatus +on_statement(void* handle, + SerdStatementFlags flags, + const SerdStatement* statement) +{ + (void)flags; + + State* state = (State*)handle; + + state->last_statement = statement; + return state->return_status; +} + +static SerdStatus +on_end(void* handle, const SerdNode* node) +{ + State* state = (State*)handle; + + state->last_end = node; + return state->return_status; +} + +static SerdStatus +on_event(void* handle, const SerdEvent* event) +{ + switch (event->type) { + case SERD_BASE: + return on_base(handle, event->base.uri); + case SERD_PREFIX: + return on_prefix(handle, event->prefix.name, event->prefix.uri); + case SERD_STATEMENT: + return on_statement( + handle, event->statement.flags, event->statement.statement); + case SERD_END: + return on_end(handle, event->end.node); + } + + return SERD_SUCCESS; +} + +int +main(void) +{ + SerdNodes* const nodes = serd_nodes_new(); + + const SerdNode* base = + serd_nodes_manage(nodes, serd_new_uri(SERD_STATIC_STRING(NS_EG))); + + const SerdNode* name = + serd_nodes_manage(nodes, serd_new_string(SERD_STATIC_STRING("eg"))); + + const SerdNode* uri = + serd_nodes_manage(nodes, serd_new_uri(SERD_STATIC_STRING(NS_EG "uri"))); + + const SerdNode* blank = + serd_nodes_manage(nodes, serd_new_blank(SERD_STATIC_STRING("b1"))); + + SerdEnv* env = serd_env_new(serd_node_string_view(base)); + + SerdStatement* const statement = + serd_statement_new(base, uri, blank, NULL, NULL); + + State state = {0, 0, 0, 0, 0, SERD_SUCCESS}; + + // Call functions on a sink with no functions set + + SerdSink* null_sink = serd_sink_new(&state, NULL, NULL); + assert(!serd_sink_write_base(null_sink, base)); + assert(!serd_sink_write_prefix(null_sink, name, uri)); + assert(!serd_sink_write_statement(null_sink, 0, statement)); + assert(!serd_sink_write(null_sink, 0, base, uri, blank, NULL)); + assert(!serd_sink_write_end(null_sink, blank)); + serd_sink_free(null_sink); + + // Try again with a sink that has the event handler set + + SerdSink* sink = serd_sink_new(&state, on_event, NULL); + + assert(!serd_sink_write_base(sink, base)); + assert(serd_node_equals(state.last_base, base)); + + assert(!serd_sink_write_prefix(sink, name, uri)); + assert(serd_node_equals(state.last_name, name)); + assert(serd_node_equals(state.last_namespace, uri)); + + assert(!serd_sink_write_statement(sink, 0, statement)); + assert(serd_statement_equals(state.last_statement, statement)); + + assert(!serd_sink_write_end(sink, blank)); + assert(serd_node_equals(state.last_end, blank)); + + serd_sink_free(sink); + serd_statement_free(statement); + serd_env_free(env); + serd_nodes_free(nodes); + + return 0; +} diff --git a/test/test_statement.c b/test/test_statement.c new file mode 100644 index 00000000..8e1de1a2 --- /dev/null +++ b/test/test_statement.c @@ -0,0 +1,91 @@ +/* + Copyright 2011-2020 David Robillard <d@drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#undef NDEBUG + +#include "serd/serd.h" + +#include <assert.h> +#include <stddef.h> + +#define NS_EG "http://example.org/" + +int +main(void) +{ + SerdNodes* const nodes = serd_nodes_new(); + + const SerdNode* const f = + serd_nodes_manage(nodes, serd_new_string(SERD_STATIC_STRING("file"))); + + const SerdNode* const s = + serd_nodes_manage(nodes, serd_new_uri(SERD_STATIC_STRING(NS_EG "s"))); + + const SerdNode* const p = + + serd_nodes_manage(nodes, serd_new_uri(SERD_STATIC_STRING(NS_EG "p"))); + + const SerdNode* const o = + serd_nodes_manage(nodes, serd_new_uri(SERD_STATIC_STRING(NS_EG "o"))); + + const SerdNode* const g = + serd_nodes_manage(nodes, serd_new_uri(SERD_STATIC_STRING(NS_EG "g"))); + + assert(!serd_statement_copy(NULL)); + + SerdCursor* const cursor = serd_cursor_new(f, 1, 1); + SerdStatement* const statement = serd_statement_new(s, p, o, g, cursor); + assert(serd_statement_equals(statement, statement)); + assert(!serd_statement_equals(statement, NULL)); + assert(!serd_statement_equals(NULL, statement)); + assert(serd_statement_subject(statement) == s); + assert(serd_statement_predicate(statement) == p); + assert(serd_statement_object(statement) == o); + assert(serd_statement_graph(statement) == g); + assert(serd_statement_cursor(statement) != cursor); + assert(serd_cursor_equals(serd_statement_cursor(statement), cursor)); + assert(serd_statement_matches(statement, s, p, o, g)); + assert(serd_statement_matches(statement, NULL, p, o, g)); + assert(serd_statement_matches(statement, s, NULL, o, g)); + assert(serd_statement_matches(statement, s, p, NULL, g)); + assert(serd_statement_matches(statement, s, p, o, NULL)); + assert(!serd_statement_matches(statement, o, NULL, NULL, NULL)); + assert(!serd_statement_matches(statement, NULL, o, NULL, NULL)); + assert(!serd_statement_matches(statement, NULL, NULL, s, NULL)); + assert(!serd_statement_matches(statement, NULL, NULL, NULL, s)); + + SerdStatement* const diff_s = serd_statement_new(o, p, o, g, cursor); + assert(!serd_statement_equals(statement, diff_s)); + serd_statement_free(diff_s); + + SerdStatement* const diff_p = serd_statement_new(s, o, o, g, cursor); + assert(!serd_statement_equals(statement, diff_p)); + serd_statement_free(diff_p); + + SerdStatement* const diff_o = serd_statement_new(s, p, s, g, cursor); + assert(!serd_statement_equals(statement, diff_o)); + serd_statement_free(diff_o); + + SerdStatement* const diff_g = serd_statement_new(s, p, o, s, cursor); + assert(!serd_statement_equals(statement, diff_g)); + serd_statement_free(diff_g); + + serd_statement_free(statement); + serd_cursor_free(cursor); + serd_nodes_free(nodes); + + return 0; +} |