From e9b773bf452f0df0ea69e363bcb5899fea124b28 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Sun, 11 Apr 2021 22:07:39 -0400 Subject: Add model --- NEWS | 1 + doc/conf.py.in | 4 +- doc/serdi.1 | 22 +- include/serd/serd.h | 409 +++++++++++++- meson.build | 7 + src/.clang-tidy | 1 + src/compare.c | 158 ++++++ src/compare.h | 54 ++ src/cursor.c | 224 ++++++++ src/cursor.h | 83 +++ src/describe.c | 266 +++++++++ src/inserter.c | 122 +++++ src/model.c | 728 +++++++++++++++++++++++++ src/model.h | 37 ++ src/reader.c | 2 +- src/serdi.c | 59 +- src/statement.c | 8 +- src/string.c | 4 + test/.clang-tidy | 1 + test/meson.build | 2 + test/run_test_suite.py | 42 ++ test/test_cursor.c | 114 ++++ test/test_free_null.c | 3 + test/test_model.c | 1396 ++++++++++++++++++++++++++++++++++++++++++++++++ test/test_string.c | 2 +- 25 files changed, 3735 insertions(+), 14 deletions(-) create mode 100644 src/compare.c create mode 100644 src/compare.h create mode 100644 src/cursor.c create mode 100644 src/cursor.h create mode 100644 src/describe.c create mode 100644 src/inserter.c create mode 100644 src/model.c create mode 100644 src/model.h create mode 100644 test/test_cursor.c create mode 100644 test/test_model.c diff --git a/NEWS b/NEWS index 580f1346..21dad0a1 100644 --- a/NEWS +++ b/NEWS @@ -3,6 +3,7 @@ serd (1.0.1) unstable; * Add SerdBuffer for mutable buffers to keep SerdChunk const-correct * Add SerdWorld for shared library state * Add extensible logging API + * Add model for storing statements in memory * Add option for writing terse output without newlines * Add support for parsing variables * Add support for writing terse collections diff --git a/doc/conf.py.in b/doc/conf.py.in index 5fdf1220..059f096f 100644 --- a/doc/conf.py.in +++ b/doc/conf.py.in @@ -14,6 +14,7 @@ pygments_style = "friendly" try: import sphinx_lv2_theme + have_lv2_theme = True except ModuleNotFoundError: have_lv2_theme = False @@ -24,6 +25,7 @@ _opaque = [ "SerdByteSinkImpl", "SerdByteSourceImpl", "SerdCaretImpl", + "SerdCursorImpl", "SerdEnvImpl", "SerdIterImpl", "SerdModelImpl", @@ -55,7 +57,7 @@ html_theme = "sphinx_lv2_theme" if have_lv2_theme: html_theme = "sphinx_lv2_theme" - if tags.has('singlehtml'): + if tags.has("singlehtml"): html_sidebars = { "**": [ "globaltoc.html", diff --git a/doc/serdi.1 b/doc/serdi.1 index 495c3940..e9d70857 100644 --- a/doc/serdi.1 +++ b/doc/serdi.1 @@ -1,12 +1,12 @@ -.Dd March 6, 2021 +.Dd April 14, 2021 .Dt SERDI 1 .Os Serd 0.30.11 .Sh NAME .Nm serdi -.Nd read and write RDF syntax +.Nd read, transform, and write RDF data .Sh SYNOPSIS .Nm serdi -.Op Fl abefhlqtvx +.Op Fl abefhlmqtvx .Op Fl I Ar base .Op Fl c Ar prefix .Op Fl i Ar syntax @@ -19,8 +19,8 @@ .Ar input ... .Sh DESCRIPTION .Nm -is a fast command-line utility for streaming and processing RDF data. -It reads one or more RDF documents and writes the data again, +is a fast command-line utility for processing RDF data. +It reads one or more documents and writes the data again, possibly transformed and/or in a different syntax. By default, the input syntax is guessed from the file extension, @@ -64,7 +64,11 @@ This is useful when reading from a pipe since output will be generated immediate With this option serdi uses one page less memory, but will likely be significantly slower. .Pp .It Fl f -Keep full URIs in input (don't qualify with namespace prefixes or make URIs relative). +Fast and loose mode. +This disables shortening URIs into prefixed names or relative URI references. +If the model is enabled, then this writes the model quickly in sorted order. +Note that doing so with TriG or Turtle may make the output ugly, +since blank nodes will not be inlined. .Pp .It Fl h Print the command line options. @@ -92,6 +96,12 @@ Lax (non-strict) parsing. If this is enabled, recoverable syntax errors will print a warning, but parsing will proceed starting at the next statement if possible. Note that data may be lost when using this option. .Pp +.It Fl m +Build a model in memory. +This loads all of the input into memory before writing the output. +This will reorder statements and eliminate duplicates, at the cost of performance and memory consumption. +When writing TriG or Turtle, this may enable better pretty-printing with more inline descriptions. +.Pp .It Fl o Ar syntax Write output as .Ar syntax . diff --git a/include/serd/serd.h b/include/serd/serd.h index 77986150..efa48543 100644 --- a/include/serd/serd.h +++ b/include/serd/serd.h @@ -205,6 +205,7 @@ typedef enum { SERD_ERR_UNKNOWN, ///< Unknown error SERD_ERR_BAD_SYNTAX, ///< Invalid syntax SERD_ERR_BAD_ARG, ///< Invalid argument + SERD_ERR_BAD_CURSOR, ///< Use of invalidated cursor SERD_ERR_NOT_FOUND, ///< Not found SERD_ERR_ID_CLASH, ///< Encountered clashing blank node IDs SERD_ERR_BAD_CURIE, ///< Invalid CURIE or unknown namespace prefix @@ -215,6 +216,7 @@ typedef enum { SERD_ERR_NO_DATA, ///< Unexpected end of input SERD_ERR_BAD_CALL, ///< Invalid call SERD_ERR_BAD_URI, ///< Invalid or unresolved URI + SERD_ERR_BAD_INDEX, ///< No optimal model index available } SerdStatus; /** @@ -547,7 +549,7 @@ serd_write_file_uri(SerdStringView path, @{ */ -/// A syntactic RDF node +/// An RDF node typedef struct SerdNodeImpl SerdNode; /** @@ -2430,6 +2432,411 @@ SERD_API SerdStatus serd_writer_finish(SerdWriter* SERD_NONNULL writer); +/** + @} + @defgroup serd_cursor Cursor + @{ +*/ + +/** + A cursor that iterates over statements in a model. + + A cursor is a smart iterator that visits all statements that match a + pattern. +*/ +typedef struct SerdCursorImpl SerdCursor; + +/// Return a new copy of `cursor` +SERD_API +SerdCursor* SERD_ALLOCATED +serd_cursor_copy(const SerdCursor* SERD_NULLABLE cursor); + +/// Return the statement pointed to by `cursor` +SERD_API +const SerdStatement* SERD_NULLABLE +serd_cursor_get(const SerdCursor* SERD_NONNULL cursor); + +/** + Increment cursor to point to the next statement. + + @return Failure if `cursor` was already at the end. +*/ +SERD_API +SerdStatus +serd_cursor_advance(SerdCursor* SERD_NONNULL cursor); + +/// Return true if the cursor has reached its end +SERD_PURE_API +bool +serd_cursor_is_end(const SerdCursor* SERD_NULLABLE cursor); + +/** + Return true iff `lhs` equals `rhs`. + + Two cursors are equivalent if they point to the same statement in the same + index in the same model, or are both the end of the same model. Note that + two cursors can point to the same statement but not be equivalent, since + they may have reached the statement via different indices. +*/ +SERD_PURE_API +bool +serd_cursor_equals(const SerdCursor* SERD_NULLABLE lhs, + const SerdCursor* SERD_NULLABLE rhs); + +/// Free `cursor` +SERD_API +void +serd_cursor_free(SerdCursor* SERD_NULLABLE cursor); + +/** + @} + @defgroup serd_range Range + @{ +*/ + +/// Flags that control the style of a model serialisation +typedef enum { + SERD_NO_INLINE_OBJECTS = 1u << 0u ///< Disable object inlining +} SerdDescribeFlag; + +/// Bitwise OR of SerdDescribeFlag values +typedef uint32_t SerdDescribeFlags; + +/** + Describe a range of statements by writing to a sink. + + This will consume the given cursor, and emit at least every statement it + visits. More statements from the model may be written in order to describe + anonymous blank nodes that are associated with a subject in the range. + + The default is to write statements in an order suited for pretty-printing + with Turtle or TriG with as many anonymous nodes as possible. If + `SERD_NO_INLINE_OBJECTS` is given, a simple sorted stream is written + instead, which is faster since no searching is required, but can result in + ugly output for Turtle or Trig. +*/ +SERD_API +SerdStatus +serd_describe_range(const SerdCursor* SERD_NULLABLE range, + const SerdSink* SERD_NONNULL sink, + SerdDescribeFlags flags); + +/** + @} + @defgroup serd_model Model + @{ +*/ + +/// An indexed set of statements +typedef struct SerdModelImpl SerdModel; + +/** + Statement ordering. + + Statements themselves always have the same fields in the same order + (subject, predicate, object, graph), but a model can keep indices for + different orderings to provide good performance for different kinds of + queries. +*/ +typedef enum { + SERD_ORDER_SPO, ///< Subject, Predicate, Object + SERD_ORDER_SOP, ///< Subject, Object, Predicate + SERD_ORDER_OPS, ///< Object, Predicate, Subject + SERD_ORDER_OSP, ///< Object, Subject, Predicate + SERD_ORDER_PSO, ///< Predicate, Subject, Object + SERD_ORDER_POS, ///< Predicate, Object, Subject + SERD_ORDER_GSPO, ///< Graph, Subject, Predicate, Object + SERD_ORDER_GSOP, ///< Graph, Subject, Object, Predicate + SERD_ORDER_GOPS, ///< Graph, Object, Predicate, Subject + SERD_ORDER_GOSP, ///< Graph, Object, Subject, Predicate + SERD_ORDER_GPSO, ///< Graph, Predicate, Subject, Object + SERD_ORDER_GPOS ///< Graph, Predicate, Object, Subject +} SerdStatementOrder; + +/// Flags that control model storage and indexing +typedef enum { + SERD_STORE_GRAPHS = 1u << 0u, ///< Store and index the graph of statements + SERD_STORE_CARETS = 1u << 1u, ///< Store original caret of statements +} SerdModelFlag; + +/// Bitwise OR of SerdModelFlag values +typedef uint32_t SerdModelFlags; + +/** + Create a new model. + + @param world The world in which to make this model. + + @param default_order The order for the default index, which is always + present and responsible for owning all the statements in the model. This + should almost always be #SERD_ORDER_SPO or #SERD_ORDER_GSPO (which + support writing pretty documents), but advanced applications that do not want + either of these indices can use a different order. Additional indices can + be added with serd_model_add_index(). + + @param flags Options that control what data is stored in the model. +*/ +SERD_API +SerdModel* SERD_ALLOCATED +serd_model_new(SerdWorld* SERD_NONNULL world, + SerdStatementOrder default_order, + SerdModelFlags flags); + +/// Return a deep copy of `model` +SERD_API +SerdModel* SERD_ALLOCATED +serd_model_copy(const SerdModel* SERD_NONNULL model); + +/// Return true iff `a` is equal to `b`, ignoring statement cursor metadata +SERD_API +bool +serd_model_equals(const SerdModel* SERD_NULLABLE a, + const SerdModel* SERD_NULLABLE b); + +/// Close and free `model` +SERD_API +void +serd_model_free(SerdModel* SERD_NULLABLE model); + +/** + Add an index for a particular statement order to the model. + + @return Failure if this index already exists. +*/ +SERD_API +SerdStatus +serd_model_add_index(SerdModel* SERD_NONNULL model, SerdStatementOrder order); + +/** + Add an index for a particular statement order to the model. + + @return Failure if this index does not exist. +*/ +SERD_API +SerdStatus +serd_model_drop_index(SerdModel* SERD_NONNULL model, SerdStatementOrder order); + +/// Get the world associated with `model` +SERD_PURE_API +SerdWorld* SERD_NONNULL +serd_model_world(SerdModel* SERD_NONNULL model); + +/// Get all nodes interned in `model` +SERD_PURE_API +const SerdNodes* SERD_NONNULL +serd_model_nodes(const SerdModel* SERD_NONNULL model); + +/// Get the default statement order of `model` +SERD_PURE_API +SerdStatementOrder +serd_model_default_order(const SerdModel* SERD_NONNULL model); + +/// Get the flags enabled on `model` +SERD_PURE_API +SerdModelFlags +serd_model_flags(const SerdModel* SERD_NONNULL model); + +/// Return the number of statements stored in `model` +SERD_PURE_API +size_t +serd_model_size(const SerdModel* SERD_NONNULL model); + +/// Return true iff there are no statements stored in `model` +SERD_PURE_API +bool +serd_model_empty(const SerdModel* SERD_NONNULL model); + +/** + Return a cursor at the start of every statement in the model. + + The returned cursor will advance over every statement in the model's default + order. +*/ +SERD_API +SerdCursor* SERD_ALLOCATED +serd_model_begin(const SerdModel* SERD_NONNULL model); + +/** + Return a cursor past the end of the model. + + This returns the "universal" end cursor, which is equivalent to any cursor + for this model that has reached its end. +*/ +SERD_CONST_API +const SerdCursor* SERD_NONNULL +serd_model_end(const SerdModel* SERD_NONNULL model); + +/// Return a cursor over all statements in the model in a specific order +SERD_API +SerdCursor* SERD_ALLOCATED +serd_model_begin_ordered(const SerdModel* SERD_NONNULL model, + SerdStatementOrder order); + +/** + Search for statements that match a pattern. + + @return An iterator to the first match, or NULL if no matches found. +*/ +SERD_API +SerdCursor* SERD_ALLOCATED +serd_model_find(const SerdModel* SERD_NONNULL model, + const SerdNode* SERD_NULLABLE s, + const SerdNode* SERD_NULLABLE p, + const SerdNode* SERD_NULLABLE o, + const SerdNode* SERD_NULLABLE g); + +/** + Search for a single node that matches a pattern. + + Exactly one of `s`, `p`, `o` must be NULL. + This function is mainly useful for predicates that only have one value. + + @return The first matching node, or NULL if no matches are found. +*/ +SERD_API +const SerdNode* SERD_NULLABLE +serd_model_get(const SerdModel* SERD_NONNULL model, + const SerdNode* SERD_NULLABLE s, + const SerdNode* SERD_NULLABLE p, + const SerdNode* SERD_NULLABLE o, + const SerdNode* SERD_NULLABLE g); + +/** + Search for a single statement that matches a pattern. + + This function is mainly useful for predicates that only have one value. + + @return The first matching statement, or NULL if none are found. +*/ +SERD_API +const SerdStatement* SERD_NULLABLE +serd_model_get_statement(const SerdModel* SERD_NONNULL model, + const SerdNode* SERD_NULLABLE s, + const SerdNode* SERD_NULLABLE p, + const SerdNode* SERD_NULLABLE o, + const SerdNode* SERD_NULLABLE g); + +/// Return true iff a statement exists +SERD_API +bool +serd_model_ask(const SerdModel* SERD_NONNULL model, + const SerdNode* SERD_NULLABLE s, + const SerdNode* SERD_NULLABLE p, + const SerdNode* SERD_NULLABLE o, + const SerdNode* SERD_NULLABLE g); + +/// Return the number of matching statements +SERD_API +size_t +serd_model_count(const SerdModel* SERD_NONNULL model, + const SerdNode* SERD_NULLABLE s, + const SerdNode* SERD_NULLABLE p, + const SerdNode* SERD_NULLABLE o, + const SerdNode* SERD_NULLABLE g); + +/** + Add a statement to a model from nodes. + + This function fails if there are any active iterators on `model`. +*/ +SERD_API +SerdStatus +serd_model_add(SerdModel* SERD_NONNULL model, + const SerdNode* SERD_NONNULL s, + const SerdNode* SERD_NONNULL p, + const SerdNode* SERD_NONNULL o, + const SerdNode* SERD_NULLABLE g); + +/** + Add a statement to a model from nodes with a caret + + This function fails if there are any active iterators on `model`. +*/ +SERD_API +SerdStatus +serd_model_add_with_caret(SerdModel* SERD_NONNULL model, + const SerdNode* SERD_NONNULL s, + const SerdNode* SERD_NONNULL p, + const SerdNode* SERD_NONNULL o, + const SerdNode* SERD_NULLABLE g, + const SerdCaret* SERD_NULLABLE caret); + +/** + Add a statement to a model. + + This function fails if there are any active iterators on `model`. + If statement is null, then SERD_FAILURE is returned. +*/ +SERD_API +SerdStatus +serd_model_insert(SerdModel* SERD_NONNULL model, + const SerdStatement* SERD_NONNULL statement); + +/** + Add a range of statements to a model. + + This function fails if there are any active iterators on `model`. +*/ +SERD_API +SerdStatus +serd_model_insert_statements(SerdModel* SERD_NONNULL model, + SerdCursor* SERD_NONNULL range); + +/** + Remove a statement from a model via an iterator. + + Calling this function invalidates all other iterators on this model. + + @param model The model which `iter` points to. + + @param cursor Cursor pointing to the element to erase. This cursor is + advanced to the next statement on return. +*/ +SERD_API +SerdStatus +serd_model_erase(SerdModel* SERD_NONNULL model, + SerdCursor* SERD_NONNULL cursor); + +/** + Remove a range of statements from a model. + + This can be used with serd_model_find() to erase all statements in a model + that match a pattern. + + Calling this function invalidates all iterators on `model`. + + @param model The model which `range` points to. + + @param range Range to erase, which will be empty on return. +*/ +SERD_API +SerdStatus +serd_model_erase_statements(SerdModel* SERD_NONNULL model, + SerdCursor* SERD_NONNULL range); + +/** + Remove everything from a model. + + Calling this function invalidates all iterators on `model`. + + @param model The model to clear. +*/ +SERD_API +SerdStatus +serd_model_clear(SerdModel* SERD_NONNULL model); + +/** + @} + @defgroup serd_inserter Inserter + @{ +*/ + +/// Create an inserter for writing statements to a model +SERD_API +SerdSink* SERD_ALLOCATED +serd_inserter_new(SerdModel* SERD_NONNULL model, + const SerdNode* SERD_NULLABLE default_graph); + /** @} @} diff --git a/meson.build b/meson.build index 6711132e..5197f448 100644 --- a/meson.build +++ b/meson.build @@ -41,6 +41,7 @@ if get_option('strict') '-Wno-format-nonliteral', '-Wno-inline', '-Wno-padded', + '-Wno-strict-overflow', '-Wno-switch-default', '-Wno-unsuffixed-float-constants', '-Wno-unused-const-variable', @@ -86,8 +87,13 @@ sources = [ 'src/byte_sink.c', 'src/byte_source.c', 'src/caret.c', + 'src/compare.c', + 'src/cursor.c', + 'src/describe.c', 'src/env.c', + 'src/inserter.c', 'src/log.c', + 'src/model.c', 'src/n3.c', 'src/node.c', 'src/node_syntax.c', @@ -105,6 +111,7 @@ sources = [ 'src/uri.c', 'src/world.c', 'src/writer.c', + 'src/zix/btree.c', 'src/zix/digest.c', 'src/zix/hash.c', ] diff --git a/src/.clang-tidy b/src/.clang-tidy index a9e25a2f..6029eeaa 100644 --- a/src/.clang-tidy +++ b/src/.clang-tidy @@ -17,6 +17,7 @@ Checks: > -llvmlibc-*, -misc-no-recursion, -readability-function-cognitive-complexity, + -readability-suspicious-call-argument, WarningsAsErrors: '*' HeaderFilterRegex: '.*' FormatStyle: file diff --git a/src/compare.c b/src/compare.c new file mode 100644 index 00000000..82ff998f --- /dev/null +++ b/src/compare.c @@ -0,0 +1,158 @@ +/* + Copyright 2011-2021 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "compare.h" + +#include "statement.h" + +#include "serd/serd.h" + +#include + +/// Compare a mandatory node with a node pattern +static inline int +serd_node_wildcard_compare(const SerdNode* SERD_NONNULL a, + const SerdNode* SERD_NULLABLE b) +{ + assert(a); + return b ? serd_node_compare(a, b) : 0; +} + +/// Compare an optional graph node with a node pattern +static inline int +serd_node_graph_compare(const SerdNode* SERD_NULLABLE a, + const SerdNode* SERD_NULLABLE b) +{ + if (a == b) { + return 0; + } + + if (!a) { + return -1; + } + + if (!b) { + return 1; + } + + return serd_node_compare(a, b); +} + +/// Compare statements lexicographically, ignoring graph +int +serd_triple_compare(const void* const x, + const void* const y, + const void* const user_data) +{ + const int* const ordering = (const int*)user_data; + const SerdStatement* const s = (const SerdStatement*)x; + const SerdStatement* const t = (const SerdStatement*)y; + + for (unsigned i = 0u; i < 3u; ++i) { + const int o = ordering[i]; + assert(o < 3); + + const int c = serd_node_compare(s->nodes[o], t->nodes[o]); + if (c) { + return c; + } + } + + return 0; +} + +/** + Compare statments with statement patterns lexicographically, ignoring graph. + + Null nodes in the second argument are treated as wildcards, always less than + any node. +*/ +int +serd_triple_compare_pattern(const void* const x, + const void* const y, + const void* const user_data) +{ + const int* const ordering = (const int*)user_data; + const SerdStatement* const s = (const SerdStatement*)x; + const SerdStatement* const t = (const SerdStatement*)y; + + for (unsigned i = 0u; i < 3u; ++i) { + const int o = ordering[i]; + assert(o < 3); + + const int c = serd_node_wildcard_compare(s->nodes[o], t->nodes[o]); + if (c) { + return c; + } + } + + return 0; +} + +/// Compare statements lexicographically +int +serd_quad_compare(const void* const x, + const void* const y, + const void* const user_data) +{ + const int* const ordering = (const int*)user_data; + const SerdStatement* const s = (const SerdStatement*)x; + const SerdStatement* const t = (const SerdStatement*)y; + + for (unsigned i = 0u; i < 4u; ++i) { + const int o = ordering[i]; + const int c = + (o == SERD_GRAPH) + ? serd_node_graph_compare(s->nodes[SERD_GRAPH], t->nodes[SERD_GRAPH]) + : serd_node_compare(s->nodes[o], t->nodes[o]); + + if (c) { + return c; + } + } + + return 0; +} + +/** + Compare statments with statement patterns lexicographically. + + Null nodes in the second argument are treated as wildcards, always less than + any node. +*/ +int +serd_quad_compare_pattern(const void* const x, + const void* const y, + const void* const user_data) +{ + const int* const ordering = (const int*)user_data; + const SerdStatement* const s = (const SerdStatement*)x; + const SerdStatement* const t = (const SerdStatement*)y; + + for (unsigned i = 0u; i < 4u; ++i) { + const int o = ordering[i]; + const int c = + (o == SERD_GRAPH) + ? serd_node_graph_compare(s->nodes[SERD_GRAPH], t->nodes[SERD_GRAPH]) + : serd_node_wildcard_compare(s->nodes[o], t->nodes[o]); + + if (c) { + return c; + } + } + + return 0; +} diff --git a/src/compare.h b/src/compare.h new file mode 100644 index 00000000..eabbf43e --- /dev/null +++ b/src/compare.h @@ -0,0 +1,54 @@ +/* + Copyright 2011-2021 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_COMPARE_H +#define SERD_COMPARE_H + +#include "serd/serd.h" + +/// Compare statements lexicographically, ignoring graph +SERD_PURE_FUNC +int +serd_triple_compare(const void* x, const void* y, const void* user_data); + +/** + Compare statments with statement patterns lexicographically, ignoring graph. + + Null nodes in the second argument are treated as wildcards, always less than + any node. +*/ +SERD_PURE_FUNC +int +serd_triple_compare_pattern(const void* x, + const void* y, + const void* user_data); + +/// Compare statements lexicographically +SERD_PURE_FUNC +int +serd_quad_compare(const void* x, const void* y, const void* user_data); + +/** + Compare statments with statement patterns lexicographically. + + Null nodes in the second argument are treated as wildcards, always less than + any node. +*/ +SERD_PURE_FUNC +int +serd_quad_compare_pattern(const void* x, const void* y, const void* user_data); + +#endif // SERD_COMPARE_H diff --git a/src/cursor.c b/src/cursor.c new file mode 100644 index 00000000..d57da866 --- /dev/null +++ b/src/cursor.c @@ -0,0 +1,224 @@ +/* + Copyright 2011-2020 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "cursor.h" + +#include "model.h" +#include "node.h" + +#include "serd/serd.h" +#include "zix/btree.h" +#include "zix/common.h" + +#include +#include +#include +#include + +static inline bool +statement_matches_quad(const SerdStatement* const statement, + const SerdQuad quad) +{ + return serd_statement_matches(statement, quad[0], quad[1], quad[2], quad[3]); +} + +SERD_PURE_FUNC +bool +serd_iter_in_range(const ZixBTreeIter iter, + const SerdQuad pattern, + const ScanStrategy strategy) +{ + const SerdStatement* const statement = + (const SerdStatement*)zix_btree_get(iter); + + for (unsigned i = 0u; i < strategy.n_prefix; ++i) { + const unsigned field = orderings[strategy.order][i]; + if (!serd_node_pattern_match(statement->nodes[field], pattern[field])) { + return false; + } + } + + return true; +} + +static bool +serd_cursor_in_range(const SerdCursor* const cursor) +{ + return (cursor->strategy.mode == FILTER_EVERYTHING || + serd_iter_in_range(cursor->iter, cursor->pattern, cursor->strategy)); +} + +/// Seek forward as necessary until the cursor points to a matching statement +static SerdStatus +serd_cursor_seek_match(SerdCursor* const cursor) +{ + assert(cursor->strategy.mode == FILTER_EVERYTHING || + cursor->strategy.mode == FILTER_RANGE); + + for (; !zix_btree_iter_is_end(cursor->iter); + zix_btree_iter_increment(&cursor->iter)) { + if (!serd_cursor_in_range(cursor)) { + // Went past the end of the matching range, reset to end + cursor->iter = zix_btree_end_iter; + return SERD_FAILURE; + } + + const SerdStatement* s = (const SerdStatement*)zix_btree_get(cursor->iter); + if (statement_matches_quad(s, cursor->pattern)) { + break; // Found matching statement + } + } + + return SERD_SUCCESS; +} + +static bool +check_version(const SerdCursor* const cursor) +{ + if (cursor->version != cursor->model->version) { + serd_logf(cursor->model->world, + SERD_LOG_LEVEL_ERROR, + "attempt to use invalidated cursor"); + return false; + } + + return true; +} + +SerdCursor +serd_cursor_make(const SerdModel* const model, + const ZixBTreeIter iter, + const SerdQuad pattern, + const ScanStrategy strategy) +{ + SerdCursor cursor = {model, + {pattern[0], pattern[1], pattern[2], pattern[3]}, + model->version, + iter, + strategy}; + + if (strategy.mode == FILTER_RANGE || strategy.mode == FILTER_EVERYTHING) { + serd_cursor_seek_match(&cursor); + } + +#ifndef NDEBUG + if (!zix_btree_iter_is_end(cursor.iter)) { + // Check that the cursor is at a matching statement + const SerdStatement* first = + (const SerdStatement*)zix_btree_get(cursor.iter); + assert(statement_matches_quad(first, pattern)); + + // Check that any nodes in the pattern are interned + for (unsigned i = 0u; i < 3; ++i) { + assert(!cursor.pattern[i] || cursor.pattern[i] == first->nodes[i]); + } + } +#endif + + return cursor; +} + +SerdCursor* +serd_cursor_copy(const SerdCursor* const cursor) +{ + if (!cursor) { + return NULL; + } + + SerdCursor* const copy = (SerdCursor* const)malloc(sizeof(SerdCursor)); + memcpy(copy, cursor, sizeof(SerdCursor)); + return copy; +} + +const SerdStatement* +serd_cursor_get(const SerdCursor* const cursor) +{ + return ((!zix_btree_iter_is_end(cursor->iter) && check_version(cursor)) + ? (const SerdStatement*)zix_btree_get(cursor->iter) + : NULL); +} + +SerdStatus +serd_cursor_scan_next(SerdCursor* const cursor) +{ + if (zix_btree_iter_is_end(cursor->iter)) { + return SERD_FAILURE; + } + + switch (cursor->strategy.mode) { + case SCAN_EVERYTHING: + break; + + case SCAN_RANGE: + if (!serd_cursor_in_range(cursor)) { + // Went past the end of the matching range + cursor->iter = zix_btree_end_iter; + return SERD_FAILURE; + } + break; + + case FILTER_EVERYTHING: + case FILTER_RANGE: + // Seek forward to next match + return serd_cursor_seek_match(cursor); + } + + return SERD_SUCCESS; +} + +SerdStatus +serd_cursor_advance(SerdCursor* const cursor) +{ + if (zix_btree_iter_is_end(cursor->iter) || !check_version(cursor)) { + return SERD_ERR_BAD_CURSOR; + } + + const ZixStatus zst = zix_btree_iter_increment(&cursor->iter); + if (zst) { + assert(zst == ZIX_STATUS_REACHED_END); + return SERD_FAILURE; + } + + return serd_cursor_scan_next(cursor); +} + +bool +serd_cursor_is_end(const SerdCursor* const cursor) +{ + return !cursor || zix_btree_iter_is_end(cursor->iter); +} + +bool +serd_cursor_equals(const SerdCursor* const lhs, const SerdCursor* const rhs) +{ + if (serd_cursor_is_end(lhs) || serd_cursor_is_end(rhs)) { + return serd_cursor_is_end(lhs) && serd_cursor_is_end(rhs); + } + + /* We don't bother checking if the patterns match each other here, or if both + cursors have the same ordering, since both of these must be true if the + BTree iterators are equal. */ + + return (zix_btree_iter_equals(lhs->iter, rhs->iter) && + lhs->strategy.mode == rhs->strategy.mode && + lhs->strategy.n_prefix == rhs->strategy.n_prefix); +} + +void +serd_cursor_free(SerdCursor* const cursor) +{ + free(cursor); +} diff --git a/src/cursor.h b/src/cursor.h new file mode 100644 index 00000000..0a786866 --- /dev/null +++ b/src/cursor.h @@ -0,0 +1,83 @@ +/* + Copyright 2011-2020 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_CURSOR_H +#define SERD_CURSOR_H + +#include "statement.h" + +#include "serd/serd.h" +#include "zix/btree.h" + +#include +#include + +#define N_STATEMENT_ORDERS 12u + +/// An iteration mode that determines what to skip and when to end +typedef enum { + SCAN_EVERYTHING, ///< Iterate over entire store + SCAN_RANGE, ///< Iterate over range with equal prefix + FILTER_EVERYTHING, ///< Iterate to end of store, filtering + FILTER_RANGE, ///< Iterate over range with equal prefix, filtering +} ScanMode; + +/// A strategy for searching and iterating over a statement index +typedef struct { + ScanMode mode; ///< Iteration mode + unsigned n_prefix; ///< Number of prefix nodes that match the index + SerdStatementOrder order; ///< Order of index to scan +} ScanStrategy; + +struct SerdCursorImpl { + const SerdModel* model; ///< Model being iterated over + const SerdNode* pattern[4]; ///< Search pattern (nodes in model or null) + size_t version; ///< Model version when iterator was created + ZixBTreeIter iter; ///< Current position in index + ScanStrategy strategy; ///< Index scanning strategy +}; + +/// Lookup table of ordered indices for each SerdStatementOrder +static const unsigned orderings[N_STATEMENT_ORDERS][4] = { + {0u, 1u, 2u, 3u}, // SPOG + {0u, 2u, 1u, 3u}, // SOPG + {2u, 1u, 0u, 3u}, // OPSG + {2u, 0u, 1u, 3u}, // OPSG + {1u, 0u, 2u, 3u}, // PSOG + {1u, 2u, 0u, 3u}, // PSOG + {3u, 0u, 1u, 2u}, // GSPO + {3u, 0u, 2u, 1u}, // GSPO + {3u, 2u, 1u, 0u}, // GOPS + {3u, 2u, 0u, 1u}, // GOPS + {3u, 1u, 0u, 2u}, // GPSO + {3u, 1u, 2u, 0u} // GPSO +}; + +SerdCursor +serd_cursor_make(const SerdModel* model, + ZixBTreeIter iter, + const SerdQuad pattern, + ScanStrategy strategy); + +SerdStatus +serd_cursor_scan_next(SerdCursor* cursor); + +bool +serd_iter_in_range(ZixBTreeIter iter, + const SerdQuad pattern, + ScanStrategy strategy); + +#endif // SERD_CURSOR_H diff --git a/src/describe.c b/src/describe.c new file mode 100644 index 00000000..5ddbf7d0 --- /dev/null +++ b/src/describe.c @@ -0,0 +1,266 @@ +/* + Copyright 2011-2020 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "cursor.h" +#include "model.h" +#include "world.h" + +// Define the types used in the hash interface for more type safety +#define ZIX_HASH_KEY_TYPE const SerdNode +#define ZIX_HASH_RECORD_TYPE const SerdNode + +#include "serd/serd.h" +#include "zix/common.h" +#include "zix/digest.h" +#include "zix/hash.h" + +#include +#include +#include + +typedef enum { NAMED, ANON_S, ANON_O, LIST_S, LIST_O } NodeStyle; + +static SerdStatus +write_range_statement(const SerdSink* sink, + const SerdModel* model, + ZixHash* list_subjects, + unsigned depth, + SerdStatementFlags flags, + const SerdStatement* statement); + +static NodeStyle +get_node_style(const SerdModel* const model, const SerdNode* const node) +{ + if (serd_node_type(node) != SERD_BLANK) { + return NAMED; // Non-blank node can't be anonymous + } + + const size_t n_as_object = serd_model_count(model, NULL, NULL, node, NULL); + if (n_as_object > 1) { + return NAMED; // Blank node referred to several times + } + + if (serd_model_count(model, node, model->world->rdf_first, NULL, NULL) == 1 && + serd_model_count(model, node, model->world->rdf_rest, NULL, NULL) == 1 && + !serd_model_ask(model, NULL, model->world->rdf_rest, node, NULL)) { + return n_as_object == 0 ? LIST_S : LIST_O; + } + + return n_as_object == 0 ? ANON_S : ANON_O; +} + +static const SerdNode* +identity(const SerdNode* const record) +{ + return record; +} + +static ZixHashCode +ptr_hash(const SerdNode* const ptr) +{ + return zix_digest(0u, &ptr, sizeof(SerdNode*)); +} + +static bool +ptr_equals(const SerdNode* const a, const SerdNode* const b) +{ + return *(const void* const*)a == *(const void* const*)b; +} + +static SerdStatus +write_pretty_range(const SerdSink* const sink, + const unsigned depth, + const SerdModel* const model, + SerdCursor* const range) +{ + ZixHash* const list_subjects = zix_hash_new(identity, ptr_hash, ptr_equals); + SerdStatus st = SERD_SUCCESS; + + while (!st && !serd_cursor_is_end(range)) { + const SerdStatement* const statement = serd_cursor_get(range); + assert(statement); + + if (!(st = write_range_statement( + sink, model, list_subjects, depth, 0, statement))) { + st = serd_cursor_advance(range); + } + } + + zix_hash_free(list_subjects); + + return st > SERD_FAILURE ? st : SERD_SUCCESS; +} + +static SerdStatus +write_list(const SerdSink* const sink, + const SerdModel* const model, + ZixHash* const list_subjects, + const unsigned depth, + SerdStatementFlags flags, + const SerdNode* object, + const SerdNode* const graph) +{ + const SerdWorld* const world = model->world; + const SerdNode* const rdf_first = world->rdf_first; + const SerdNode* const rdf_rest = world->rdf_rest; + const SerdNode* const rdf_nil = world->rdf_nil; + SerdStatus st = SERD_SUCCESS; + + const SerdStatement* fs = + serd_model_get_statement(model, object, rdf_first, NULL, graph); + + assert(fs); // Shouldn't get here if it doesn't at least have an rdf:first + + while (!st && !serd_node_equals(object, rdf_nil)) { + // Write rdf:first statement for this node + if ((st = write_range_statement( + sink, model, list_subjects, depth, flags, fs))) { + return st; + } + + // Get rdf:rest statement + const SerdStatement* const rs = + serd_model_get_statement(model, object, rdf_rest, NULL, graph); + + if (!rs) { + // Terminate malformed list with missing rdf:rest + return serd_sink_write(sink, 0, object, rdf_rest, rdf_nil, graph); + } + + // Terminate if the next node has no rdf:first + const SerdNode* const next = serd_statement_object(rs); + if (!(fs = serd_model_get_statement(model, next, rdf_first, NULL, graph))) { + return serd_sink_write(sink, 0, object, rdf_rest, rdf_nil, graph); + } + + // Write rdf:next statement and move to the next node + st = serd_sink_write_statement(sink, 0, rs); + object = next; + flags = 0u; + } + + return st; +} + +static bool +skip_range_statement(const SerdModel* const model, + const SerdStatement* const statement) +{ + const SerdNode* const subject = serd_statement_subject(statement); + const NodeStyle subject_style = get_node_style(model, subject); + const SerdNode* const predicate = serd_statement_predicate(statement); + + if (subject_style == ANON_O || subject_style == LIST_O) { + return true; // Skip subject that will be inlined elsewhere + } + + if (subject_style == LIST_S && + (serd_node_equals(predicate, model->world->rdf_first) || + serd_node_equals(predicate, model->world->rdf_rest))) { + return true; // Skip list statement that write_list will handle + } + + return false; +} + +static SerdStatus +write_range_statement(const SerdSink* const sink, + const SerdModel* const model, + ZixHash* const list_subjects, + const unsigned depth, + SerdStatementFlags flags, + const SerdStatement* SERD_NONNULL statement) +{ + const SerdNode* const subject = serd_statement_subject(statement); + const NodeStyle subject_style = get_node_style(model, subject); + const SerdNode* const object = serd_statement_object(statement); + const NodeStyle object_style = get_node_style(model, object); + const SerdNode* const graph = serd_statement_graph(statement); + SerdStatus st = SERD_SUCCESS; + + if (subject_style == ANON_S) { // Write anonymous subject like "[] p o" + flags |= SERD_EMPTY_S; + } + + if (depth == 0u) { + if (skip_range_statement(model, statement)) { + return SERD_SUCCESS; // Skip subject that will be inlined elsewhere + } + + if (subject_style == LIST_S) { + // First write inline list subject, which this statement will follow + if (zix_hash_insert(list_subjects, subject) != ZIX_STATUS_EXISTS) { + st = write_list( + sink, model, list_subjects, 2, SERD_LIST_S, subject, graph); + } + } + } + + if (st) { + return st; + } + + if (object_style == ANON_O) { // Write anonymous object like "[ ... ]" + SerdCursor* const iter = serd_model_find(model, object, NULL, NULL, NULL); + + flags |= SERD_ANON_O; + if (!(st = serd_sink_write_statement(sink, flags, statement))) { + if (!(st = write_pretty_range(sink, depth + 1, model, iter))) { + st = serd_sink_write_end(sink, object); + } + } + + serd_cursor_free(iter); + + } else if (object_style == LIST_O) { // Write list object like "( ... )" + flags |= SERD_LIST_O; + if (!(st = serd_sink_write_statement(sink, flags, statement))) { + st = write_list(sink, model, list_subjects, depth + 1, 0, object, graph); + } + + } else { + st = serd_sink_write_statement(sink, flags, statement); + } + + return st; +} + +SerdStatus +serd_describe_range(const SerdCursor* const range, + const SerdSink* sink, + const SerdDescribeFlags flags) +{ + SerdStatus st = SERD_SUCCESS; + + if (serd_cursor_is_end(range)) { + return st; + } + + SerdCursor copy = *range; + + if (flags & SERD_NO_INLINE_OBJECTS) { + const SerdStatement* statement = NULL; + while (!st && (statement = serd_cursor_get(©))) { + if (!(st = serd_sink_write_statement(sink, 0, statement))) { + st = serd_cursor_advance(©); + } + } + } else { + st = write_pretty_range(sink, 0, range->model, ©); + } + + return st; +} diff --git a/src/inserter.c b/src/inserter.c new file mode 100644 index 00000000..f5fd6ef1 --- /dev/null +++ b/src/inserter.c @@ -0,0 +1,122 @@ +/* + Copyright 2011-2020 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "model.h" +#include "statement.h" + +#include "serd/serd.h" + +#include +#include + +typedef struct { + SerdModel* model; + const SerdNode* default_graph; +} SerdInserterData; + +static bool +can_insert(SerdWorld* const world, const SerdNode* const node) +{ + if (node) { + switch (serd_node_type(node)) { + case SERD_LITERAL: + return can_insert(world, serd_node_datatype(node)); + + case SERD_URI: + if (!serd_uri_string_has_scheme(serd_node_string(node))) { + serd_logf(world, + SERD_LOG_LEVEL_ERROR, + "attempt to insert relative URI <%s> into model", + serd_node_string(node)); + return false; + } + break; + + case SERD_BLANK: + case SERD_VARIABLE: + break; + } + } + + return true; +} + +static SerdStatus +serd_inserter_on_statement(SerdInserterData* const data, + const SerdStatementFlags flags, + const SerdStatement* const statement) +{ + (void)flags; + + SerdModel* const model = data->model; + SerdWorld* const world = model->world; + + // Check that every node is expanded so it is context-free + for (unsigned i = 0; i < 4; ++i) { + if (!can_insert(world, serd_statement_node(statement, (SerdField)i))) { + return SERD_ERR_BAD_ARG; + } + } + + const SerdNode* const s = + serd_nodes_intern(model->nodes, serd_statement_subject(statement)); + + const SerdNode* const p = + serd_nodes_intern(model->nodes, serd_statement_predicate(statement)); + + const SerdNode* const o = + serd_nodes_intern(model->nodes, serd_statement_object(statement)); + + const SerdNode* const g = serd_nodes_intern( + model->nodes, + serd_statement_graph(statement) ? serd_statement_graph(statement) + : data->default_graph); + + const SerdCaret* const caret = + (data->model->flags & SERD_STORE_CARETS) ? statement->caret : NULL; + + const SerdStatus st = + serd_model_add_with_caret(data->model, s, p, o, g, caret); + + return st > SERD_FAILURE ? st : SERD_SUCCESS; +} + +static SerdStatus +serd_inserter_on_event(SerdInserterData* const data, + const SerdEvent* const event) +{ + if (event->type == SERD_STATEMENT) { + return serd_inserter_on_statement( + data, event->statement.flags, event->statement.statement); + } + + return SERD_SUCCESS; +} + +SerdSink* +serd_inserter_new(SerdModel* const model, const SerdNode* const default_graph) +{ + SerdInserterData* const data = + (SerdInserterData*)calloc(1, sizeof(SerdInserterData)); + + data->model = model; + data->default_graph = serd_node_copy(default_graph); + + SerdSink* const sink = + serd_sink_new(data, (SerdEventFunc)serd_inserter_on_event, free); + + return sink; +} diff --git a/src/model.c b/src/model.c new file mode 100644 index 00000000..0485b509 --- /dev/null +++ b/src/model.c @@ -0,0 +1,728 @@ +/* + Copyright 2011-2020 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "model.h" + +#include "caret.h" +#include "compare.h" +#include "cursor.h" +#include "statement.h" + +#include "zix/btree.h" +#include "zix/common.h" + +#include +#include +#include +#include + +static const SerdQuad everything_pattern = {0, 0, 0, 0}; + +/// A 3-bit signature for a triple pattern used as a table index +typedef enum { + SERD_SIGNATURE_XXX, // 000 (???) + SERD_SIGNATURE_XXO, // 001 (??O) + SERD_SIGNATURE_XPX, // 010 (?P?) + SERD_SIGNATURE_XPO, // 011 (?PO) + SERD_SIGNATURE_SXX, // 100 (S??) + SERD_SIGNATURE_SXO, // 101 (S?O) + SERD_SIGNATURE_SPX, // 110 (SP?) + SERD_SIGNATURE_SPO, // 111 (SPO) +} SerdPatternSignature; + +static ZixComparator +serd_model_index_comparator(const SerdModel* const model, + const SerdStatementOrder order) +{ + return (order < SERD_ORDER_GSPO && !(model->flags & SERD_STORE_GRAPHS)) + ? serd_triple_compare + : serd_quad_compare; +} + +static ZixComparator +serd_model_pattern_comparator(const SerdModel* const model, + const SerdStatementOrder order) +{ + return (order < SERD_ORDER_GSPO && !(model->flags & SERD_STORE_GRAPHS)) + ? serd_triple_compare_pattern + : serd_quad_compare_pattern; +} + +SerdStatus +serd_model_add_index(SerdModel* const model, const SerdStatementOrder order) +{ + if (model->indices[order]) { + return SERD_FAILURE; + } + + const unsigned* const ordering = orderings[order]; + const ZixComparator comparator = serd_model_index_comparator(model, order); + + model->indices[order] = zix_btree_new(comparator, ordering); + + ZixStatus zst = model->indices[order] ? ZIX_STATUS_SUCCESS : ZIX_STATUS_ERROR; + + // Insert statements from the default index + if (order != model->default_order) { + ZixBTree* const default_index = model->indices[model->default_order]; + for (ZixBTreeIter i = zix_btree_begin(default_index); + !zst && !zix_btree_iter_is_end(i); + zix_btree_iter_increment(&i)) { + zst = zix_btree_insert(model->indices[order], zix_btree_get(i)); + } + } + + return zst ? serd_model_drop_index(model, order) : SERD_SUCCESS; +} + +SerdStatus +serd_model_drop_index(SerdModel* const model, const SerdStatementOrder order) +{ + if (!model->indices[order]) { + return SERD_FAILURE; + } + + if (order == model->default_order) { + return SERD_ERR_BAD_CALL; + } + + zix_btree_free(model->indices[order], NULL); + model->indices[order] = NULL; + return SERD_SUCCESS; +} + +SerdModel* +serd_model_new(SerdWorld* const world, + const SerdStatementOrder default_order, + const SerdModelFlags flags) +{ + SerdModel* model = (SerdModel*)calloc(1, sizeof(struct SerdModelImpl)); + + model->world = world; + model->nodes = serd_nodes_new(); + model->default_order = default_order; + model->flags = flags; + + serd_model_add_index(model, default_order); + + const ScanStrategy end_strategy = {SCAN_EVERYTHING, 0u, default_order}; + + model->end = serd_cursor_make(model, + zix_btree_end(model->indices[default_order]), + everything_pattern, + end_strategy); + + return model; +} + +SerdModel* +serd_model_copy(const SerdModel* const model) +{ + SerdModel* copy = + serd_model_new(model->world, model->default_order, model->flags); + + SerdCursor* cursor = serd_model_begin(model); + serd_model_insert_statements(copy, cursor); + serd_cursor_free(cursor); + + assert(serd_model_size(model) == serd_model_size(copy)); + assert(serd_nodes_size(model->nodes) == serd_nodes_size(copy->nodes)); + return copy; +} + +static void +log_bad_index(const SerdModel* const model, + const char* const description, + const SerdStatementOrder index_order, + const bool s, + const bool p, + const bool o, + const bool g) +{ + static const char* const order_strings[] = { + "S P O G", + "S O P G", + "O P S G", + "O S P G", + "P S O G", + "P O S G", + "G S P O", + "G S O P", + "G O P S", + "G O S P", + "G P S O", + "G P O S", + }; + + serd_logf(model->world, + SERD_LOG_LEVEL_WARNING, + "%s index (%s) for (%s %s %s%s) query", + description, + order_strings[index_order], + s ? "S" : "?", + p ? "P" : "?", + o ? "O" : "?", + (model->flags & SERD_STORE_GRAPHS) ? g ? " G" : " ?" : ""); +} + +static SerdCursor +make_begin_cursor(const SerdModel* const model, const SerdStatementOrder order) +{ + if (!model->indices[order]) { + log_bad_index(model, "missing", order, false, false, false, false); + return model->end; + } + + const ScanStrategy strategy = {SCAN_EVERYTHING, 0u, order}; + + return serd_cursor_make(model, + zix_btree_begin(model->indices[order]), + everything_pattern, + strategy); +} + +bool +serd_model_equals(const SerdModel* const a, const SerdModel* const b) +{ + if (!a && !b) { + return true; + } + + if (!a || !b || serd_model_size(a) != serd_model_size(b)) { + return false; + } + + SerdCursor ia = make_begin_cursor(a, a->default_order); + SerdCursor ib = make_begin_cursor(b, b->default_order); + + while (!serd_cursor_is_end(&ia) && !serd_cursor_is_end(&ib)) { + if (!serd_statement_equals(serd_cursor_get(&ia), serd_cursor_get(&ib)) || + serd_cursor_advance(&ia) > SERD_FAILURE || + serd_cursor_advance(&ib) > SERD_FAILURE) { + return false; + } + } + + return serd_cursor_is_end(&ia) && serd_cursor_is_end(&ib); +} + +static void +serd_model_drop_statement(SerdModel* const model, + SerdStatement* const statement) +{ + assert(statement); + + for (unsigned i = 0u; i < 4; ++i) { + if (statement->nodes[i]) { + serd_nodes_deref(model->nodes, statement->nodes[i]); + } + } + + if (statement->caret && statement->caret->file) { + serd_nodes_deref(model->nodes, statement->caret->file); + } + + serd_statement_free(statement); +} + +void +serd_model_free(SerdModel* const model) +{ + if (!model) { + return; + } + + // Free all statements (which are owned by the default index) + ZixBTree* const default_index = model->indices[model->default_order]; + zix_btree_clear(default_index, (ZixDestroyFunc)serd_statement_free); + + // Free indices themselves + for (unsigned i = 0u; i < N_STATEMENT_ORDERS; ++i) { + zix_btree_free(model->indices[i], NULL); + } + + serd_nodes_free(model->nodes); + free(model); +} + +SerdWorld* +serd_model_world(SerdModel* const model) +{ + return model->world; +} + +const SerdNodes* +serd_model_nodes(const SerdModel* const model) +{ + return model->nodes; +} + +SerdStatementOrder +serd_model_default_order(const SerdModel* const model) +{ + return model->default_order; +} + +SerdModelFlags +serd_model_flags(const SerdModel* const model) +{ + return model->flags; +} + +size_t +serd_model_size(const SerdModel* const model) +{ + return zix_btree_size(model->indices[model->default_order]); +} + +bool +serd_model_empty(const SerdModel* const model) +{ + return serd_model_size(model) == 0; +} + +SerdCursor* +serd_model_begin_ordered(const SerdModel* const model, + const SerdStatementOrder order) +{ + const SerdCursor cursor = make_begin_cursor(model, order); + + return serd_cursor_copy(&cursor); +} + +SerdCursor* +serd_model_begin(const SerdModel* const model) +{ + return serd_model_begin_ordered(model, model->default_order); +} + +const SerdCursor* +serd_model_end(const SerdModel* const model) +{ + return &model->end; +} + +static SerdStatementOrder +simple_order(const SerdStatementOrder order) +{ + return order < SERD_ORDER_GSPO ? order : (SerdStatementOrder)(order - 6u); +} + +/// Return the best index scanning strategy for a pattern with given nodes +static ScanStrategy +serd_model_strategy(const SerdModel* const model, + const bool with_s, + const bool with_p, + const bool with_o, + const bool with_g) +{ +#define N_STRATEGY_TIERS 4u + + const SerdStatementOrder default_order = simple_order(model->default_order); + const ScanStrategy none = {SCAN_EVERYTHING, 0u, default_order}; + + const ScanStrategy strategies[N_STRATEGY_TIERS][8] = { + // Preferred perfect strategies: SPO, SOP, OPS, PSO + { + none, + {SCAN_RANGE, 1u, SERD_ORDER_OPS}, // ??O + {SCAN_RANGE, 1u, SERD_ORDER_PSO}, // ?P? + {SCAN_RANGE, 2u, SERD_ORDER_OPS}, // ?PO + {SCAN_RANGE, 1u, SERD_ORDER_SPO}, // S?? + {SCAN_RANGE, 2u, SERD_ORDER_SOP}, // S?O + {SCAN_RANGE, 2u, SERD_ORDER_SPO}, // SP? + {SCAN_RANGE, 3u, default_order} // SPO + }, + + // Alternate perfect strategies: adds OSP and POS + { + none, // ??? + {SCAN_RANGE, 1u, SERD_ORDER_OSP}, // ??O + {SCAN_RANGE, 1u, SERD_ORDER_POS}, // ?P? + {SCAN_RANGE, 2u, SERD_ORDER_POS}, // ?PO + {SCAN_RANGE, 1u, SERD_ORDER_SOP}, // S?? + {SCAN_RANGE, 2u, SERD_ORDER_OSP}, // S?O + {SCAN_RANGE, 2u, SERD_ORDER_PSO}, // SP? + none // SPO + }, + + // Preferred partial prefix strategies + { + none, // ??? + none, // ??O + none, // ?P? + {FILTER_RANGE, 1u, SERD_ORDER_PSO}, // ?PO + none, // S?? + {FILTER_RANGE, 1u, SERD_ORDER_SPO}, // S?O + {FILTER_RANGE, 1u, SERD_ORDER_SOP}, // SP? + none // SPO + }, + + // Alternate partial prefix strategies + { + none, // ??? + none, // ??O + none, // ?P? + {FILTER_RANGE, 1u, SERD_ORDER_OSP}, // ?PO + none, // S?? + {FILTER_RANGE, 1u, SERD_ORDER_OPS}, // S?O + {FILTER_RANGE, 1u, SERD_ORDER_POS}, // SP? + none // SPO + }, + }; + + // Construct a 3-bit signature for this triple pattern + const SerdPatternSignature sig = + (SerdPatternSignature)(((with_s ? 1u : 0u) << 2u) + // + ((with_p ? 1u : 0u) << 1u) + // + ((with_o ? 1u : 0u))); + + if (!sig && !with_g) { + return none; + } + + // Try to find a strategy we can support, from most to least preferred + for (unsigned t = 0u; t < N_STRATEGY_TIERS; ++t) { + ScanStrategy strategy = strategies[t][sig]; + const SerdStatementOrder triple_order = strategy.order; + + assert(strategy.order < SERD_ORDER_GSPO); + + if (strategy.n_prefix > 0) { + if (with_g) { + const SerdStatementOrder quad_order = + (SerdStatementOrder)(triple_order + 6u); + + if (model->indices[quad_order]) { + strategy.order = quad_order; + ++strategy.n_prefix; + return strategy; + } + } + + if (model->indices[triple_order]) { + return strategy; + } + } + } + + // Indices don't help with the triple, try to at least stay in the graph + if (with_g) { + for (unsigned i = SERD_ORDER_GSPO; i <= SERD_ORDER_GPOS; ++i) { + if (model->indices[i]) { + const ScanMode mode = sig == 0x000 ? SCAN_RANGE : FILTER_RANGE; + const ScanStrategy strategy = {mode, 1u, (SerdStatementOrder)i}; + + return strategy; + } + } + } + + // All is lost, regress to linear search + const ScanStrategy linear = {FILTER_EVERYTHING, 0u, model->default_order}; + return linear; +} + +static SerdCursor +serd_model_search(const SerdModel* const model, + const SerdNode* const s, + const SerdNode* const p, + const SerdNode* const o, + const SerdNode* const g) +{ + // Build a pattern of interned nodes + const SerdQuad pattern = {serd_nodes_get(model->nodes, s), + serd_nodes_get(model->nodes, p), + serd_nodes_get(model->nodes, o), + serd_nodes_get(model->nodes, g)}; + + // If some node isn't in the model at all, no need to search for statements + const int n_given = !!s + !!p + !!o + !!g; + if (n_given != (!!pattern[0] + !!pattern[1] + !!pattern[2] + !!pattern[3])) { + return model->end; + } + + // Determine the best available search strategy + const ScanStrategy strategy = serd_model_strategy(model, s, p, o, g); + const SerdStatementOrder order = strategy.order; + ZixBTree* const index = model->indices[order]; + + // Issue any performance warnings, and return early for degenerate cases + switch (strategy.mode) { + case SCAN_EVERYTHING: + return make_begin_cursor(model, order); + case SCAN_RANGE: + break; + case FILTER_EVERYTHING: + log_bad_index(model, "using effectively linear", order, s, p, o, g); + return serd_cursor_make(model, zix_btree_begin(index), pattern, strategy); + case FILTER_RANGE: + log_bad_index(model, "filtering partial", order, s, p, o, g); + break; + } + + // Find the first statement matching the pattern in the index + ZixBTreeIter iter = zix_btree_end_iter; + zix_btree_lower_bound(index, + serd_model_pattern_comparator(model, order), + orderings[order], + pattern, + &iter); + + return (!zix_btree_iter_is_end(iter) && + serd_iter_in_range(iter, pattern, strategy)) + ? serd_cursor_make(model, iter, pattern, strategy) + : model->end; +} + +SerdCursor* +serd_model_find(const SerdModel* const model, + const SerdNode* const s, + const SerdNode* const p, + const SerdNode* const o, + const SerdNode* const g) +{ + const SerdCursor cursor = serd_model_search(model, s, p, o, g); + + return zix_btree_iter_is_end(cursor.iter) ? NULL : serd_cursor_copy(&cursor); +} + +const SerdNode* +serd_model_get(const SerdModel* const model, + const SerdNode* const s, + const SerdNode* const p, + const SerdNode* const o, + const SerdNode* const g) +{ + const SerdStatement* const statement = + serd_model_get_statement(model, s, p, o, g); + + return !statement ? NULL + : !s ? serd_statement_subject(statement) + : !p ? serd_statement_predicate(statement) + : !o ? serd_statement_object(statement) + : !g ? serd_statement_graph(statement) + : NULL; +} + +const SerdStatement* +serd_model_get_statement(const SerdModel* const model, + const SerdNode* const s, + const SerdNode* const p, + const SerdNode* const o, + const SerdNode* const g) +{ + if ((bool)s + (bool)p + (bool)o != 2 && + (bool)s + (bool)p + (bool)o + (bool)g != 3) { + return NULL; + } + + const SerdCursor i = serd_model_search(model, s, p, o, g); + + return serd_cursor_get(&i); +} + +size_t +serd_model_count(const SerdModel* const model, + const SerdNode* const s, + const SerdNode* const p, + const SerdNode* const o, + const SerdNode* const g) +{ + SerdCursor i = serd_model_search(model, s, p, o, g); + size_t count = 0; + + for (; !serd_cursor_is_end(&i); serd_cursor_advance(&i)) { + ++count; + } + + return count; +} + +bool +serd_model_ask(const SerdModel* const model, + const SerdNode* const s, + const SerdNode* const p, + const SerdNode* const o, + const SerdNode* const g) +{ + const SerdCursor c = serd_model_search(model, s, p, o, g); + + return !serd_cursor_is_end(&c); +} + +static SerdCaret* +serd_model_intern_caret(SerdModel* const model, const SerdCaret* const caret) +{ + if (caret) { + SerdCaret* copy = (SerdCaret*)calloc(1, sizeof(SerdCaret)); + + copy->file = serd_nodes_intern(model->nodes, caret->file); + copy->line = caret->line; + copy->col = caret->col; + return copy; + } + + return NULL; +} + +SerdStatus +serd_model_add_with_caret(SerdModel* const model, + const SerdNode* const s, + const SerdNode* const p, + const SerdNode* const o, + const SerdNode* const g, + const SerdCaret* const caret) +{ + SerdStatement* const statement = serd_statement_new(s, p, o, g, NULL); + if (!statement) { + return SERD_ERR_UNKNOWN; + } + + statement->caret = serd_model_intern_caret(model, caret); + + bool added = false; + for (unsigned i = 0u; i < N_STATEMENT_ORDERS; ++i) { + if (model->indices[i]) { + if (!zix_btree_insert(model->indices[i], statement)) { + added = true; + } else if ((SerdStatementOrder)i == model->default_order) { + break; // Statement already indexed + } + } + } + + ++model->version; + if (added) { + return SERD_SUCCESS; + } + + serd_model_drop_statement(model, statement); + return SERD_FAILURE; +} + +SerdStatus +serd_model_add(SerdModel* const model, + const SerdNode* const s, + const SerdNode* const p, + const SerdNode* const o, + const SerdNode* const g) +{ + return serd_model_add_with_caret(model, + serd_nodes_intern(model->nodes, s), + serd_nodes_intern(model->nodes, p), + serd_nodes_intern(model->nodes, o), + serd_nodes_intern(model->nodes, g), + NULL); +} + +SerdStatus +serd_model_insert(SerdModel* const model, const SerdStatement* const statement) +{ + SerdNodes* const nodes = model->nodes; + + return serd_model_add_with_caret( + model, + serd_nodes_intern(nodes, serd_statement_subject(statement)), + serd_nodes_intern(nodes, serd_statement_predicate(statement)), + serd_nodes_intern(nodes, serd_statement_object(statement)), + serd_nodes_intern(nodes, serd_statement_graph(statement)), + serd_statement_caret(statement)); +} + +SerdStatus +serd_model_insert_statements(SerdModel* const model, SerdCursor* const range) +{ + const SerdStatement* statement = serd_cursor_get(range); + SerdStatus st = SERD_SUCCESS; + + while (!st && statement) { + if (!(st = serd_model_insert(model, statement))) { + st = serd_cursor_advance(range); + } + + statement = serd_cursor_get(range); + } + + return st; +} + +SerdStatus +serd_model_erase(SerdModel* const model, SerdCursor* const cursor) +{ + const SerdStatement* statement = serd_cursor_get(cursor); + SerdStatement* removed = NULL; + ZixStatus zst = ZIX_STATUS_SUCCESS; + + if (!statement) { + return SERD_FAILURE; + } + + // Erase from the index associated with this cursor + zst = zix_btree_remove(model->indices[cursor->strategy.order], + statement, + (void**)&removed, + &cursor->iter); + + // No error should be possible with a valid cursor + assert(!zst); + assert(removed); + assert(removed == statement); + + for (unsigned i = SERD_ORDER_SPO; i <= SERD_ORDER_GPOS; ++i) { + ZixBTree* index = model->indices[i]; + + if (index && i != (unsigned)cursor->strategy.order) { + SerdStatement* ignored = NULL; + ZixBTreeIter next = zix_btree_end_iter; + zst = zix_btree_remove(index, statement, (void**)&ignored, &next); + + assert(zst == ZIX_STATUS_SUCCESS || zst == ZIX_STATUS_NOT_FOUND); + assert(!ignored || ignored == removed); + } + } + serd_cursor_scan_next(cursor); + + serd_model_drop_statement(model, removed); + cursor->version = ++model->version; + + (void)zst; + return SERD_SUCCESS; +} + +SerdStatus +serd_model_erase_statements(SerdModel* const model, SerdCursor* const range) +{ + SerdStatus st = SERD_SUCCESS; + + while (!st && !serd_cursor_is_end(range)) { + st = serd_model_erase(model, range); + } + + return st; +} + +SerdStatus +serd_model_clear(SerdModel* const model) +{ + SerdCursor i = make_begin_cursor(model, model->default_order); + + while (!serd_cursor_is_end(&i)) { + serd_model_erase(model, &i); + } + + return SERD_SUCCESS; +} diff --git a/src/model.h b/src/model.h new file mode 100644 index 00000000..5fbf6209 --- /dev/null +++ b/src/model.h @@ -0,0 +1,37 @@ +/* + Copyright 2011-2020 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_MODEL_H +#define SERD_MODEL_H + +#include "cursor.h" + +#include "serd/serd.h" +#include "zix/btree.h" + +#include + +struct SerdModelImpl { + SerdWorld* world; ///< World this model is a part of + SerdNodes* nodes; ///< Interned nodes in this model + ZixBTree* indices[12]; ///< Trees of SerdStatement pointers + SerdCursor end; ///< End cursor (always the same) + size_t version; ///< Version incremented on every change + SerdStatementOrder default_order; ///< Order for main statement-owning index + SerdModelFlags flags; ///< Active indices and features +}; + +#endif // SERD_MODEL_H diff --git a/src/reader.c b/src/reader.c index 5afc28ff..91e61f72 100644 --- a/src/reader.c +++ b/src/reader.c @@ -91,7 +91,7 @@ tolerate_status(const SerdReader* const reader, const SerdStatus status) if (status == SERD_ERR_INTERNAL || status == SERD_ERR_OVERFLOW || status == SERD_ERR_BAD_WRITE || status == SERD_ERR_NO_DATA || - status == SERD_ERR_BAD_CALL) { + status == SERD_ERR_BAD_CALL || status == SERD_ERR_BAD_CURSOR) { return false; } diff --git a/src/serdi.c b/src/serdi.c index 2acae921..97601b83 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -61,11 +61,12 @@ print_usage(const char* const name, const bool error) fprintf(os, " -b Fast bulk output for large serialisations.\n"); fprintf(os, " -c PREFIX Chop PREFIX from matching blank node IDs.\n"); fprintf(os, " -e Eat input one character at a time.\n"); - fprintf(os, " -f Keep full URIs in input (don't qualify).\n"); + fprintf(os, " -f Fast and loose mode (possibly ugly output).\n"); fprintf(os, " -h Display this help and exit.\n"); fprintf(os, " -i SYNTAX Input syntax: turtle/ntriples/trig/nquads.\n"); fprintf(os, " -k BYTES Parser stack size.\n"); fprintf(os, " -l Lax (non-strict) parsing.\n"); + fprintf(os, " -m Build a model in memory before writing.\n"); fprintf(os, " -o SYNTAX Output syntax: empty/turtle/ntriples/nquads.\n"); fprintf(os, " -p PREFIX Add PREFIX to blank node IDs.\n"); fprintf(os, " -q Suppress all output except data.\n"); @@ -149,7 +150,9 @@ main(int argc, char** argv) SerdWriterFlags writer_flags = 0; bool bulk_read = true; bool bulk_write = false; + bool no_inline = false; bool osyntax_set = false; + bool use_model = false; bool quiet = false; size_t stack_size = 4194304; const char* input_string = NULL; @@ -173,12 +176,15 @@ main(int argc, char** argv) } else if (opt == 'e') { bulk_read = false; } else if (opt == 'f') { + no_inline = true; writer_flags |= (SERD_WRITE_UNQUALIFIED | SERD_WRITE_UNRESOLVED); } else if (opt == 'h') { return print_usage(prog, false); } else if (opt == 'l') { reader_flags |= SERD_READ_LAX; writer_flags |= SERD_WRITE_LAX; + } else if (argv[a][1] == 'm') { + use_model = true; } else if (opt == 'q') { quiet = true; } else if (opt == 't') { @@ -312,6 +318,9 @@ main(int argc, char** argv) } #endif + const SerdDescribeFlags describe_flags = + no_inline ? SERD_NO_INLINE_OBJECTS : 0u; + const size_t block_size = bulk_write ? 4096u : 1u; SerdByteSink* const byte_sink = out_filename @@ -326,6 +335,30 @@ main(int argc, char** argv) SerdWriter* const writer = serd_writer_new(world, output_syntax, writer_flags, env, byte_sink); + SerdModel* model = NULL; + SerdSink* inserter = NULL; + const SerdSink* sink = NULL; + if (use_model) { + const SerdModelFlags flags = (input_has_graphs ? SERD_STORE_GRAPHS : 0u); + + model = serd_model_new(world, SERD_ORDER_SPO, flags); + if (input_has_graphs) { + serd_model_add_index(model, SERD_ORDER_GSPO); + } + + if (!no_inline) { + serd_model_add_index(model, SERD_ORDER_OPS); + if (input_has_graphs) { + serd_model_add_index(model, SERD_ORDER_GOPS); + } + } + + inserter = serd_inserter_new(model, NULL); + sink = inserter; + } else { + sink = serd_writer_sink(writer); + } + if (quiet) { serd_set_log_func(world, serd_quiet_log_func, NULL); } @@ -349,7 +382,7 @@ main(int argc, char** argv) input_syntax ? input_syntax : SERD_TRIG, reader_flags, env, - serd_writer_sink(writer), + sink, stack_size); serd_reader_add_blank_prefix(reader, add_prefix); @@ -394,7 +427,7 @@ main(int argc, char** argv) input_syntax, reader_flags, env, - serd_writer_sink(writer), + sink, stack_size, inputs[i], n_inputs > 1 ? prefix : add_prefix, @@ -404,6 +437,26 @@ main(int argc, char** argv) } free(prefix); + if (st <= SERD_FAILURE && use_model) { + const SerdSink* writer_sink = serd_writer_sink(writer); + SerdCursor* everything = serd_model_begin_ordered( + model, input_has_graphs ? SERD_ORDER_GSPO : SERD_ORDER_SPO); + + serd_env_write_prefixes(env, writer_sink); + + st = serd_describe_range( + everything, + writer_sink, + describe_flags | + ((output_syntax == SERD_NTRIPLES || output_syntax == SERD_NQUADS) + ? SERD_NO_INLINE_OBJECTS + : 0u)); + + serd_cursor_free(everything); + } + + serd_sink_free(inserter); + serd_model_free(model); serd_writer_free(writer); serd_node_free(input_name); serd_env_free(env); diff --git a/src/statement.c b/src/statement.c index d6571c6c..7bb2486f 100644 --- a/src/statement.c +++ b/src/statement.c @@ -50,7 +50,13 @@ serd_statement_new(const SerdNode* const s, const SerdNode* const g, const SerdCaret* const caret) { - assert(serd_statement_is_valid(s, p, o, g)); + assert(s); + assert(p); + assert(o); + + if (!serd_statement_is_valid(s, p, o, g)) { + return NULL; + } SerdStatement* statement = (SerdStatement*)malloc(sizeof(SerdStatement)); if (statement) { diff --git a/src/string.c b/src/string.c index 2d25fb80..11b53050 100644 --- a/src/string.c +++ b/src/string.c @@ -38,6 +38,8 @@ serd_strerror(const SerdStatus status) return "Invalid syntax"; case SERD_ERR_BAD_ARG: return "Invalid argument"; + case SERD_ERR_BAD_CURSOR: + return "Invalid cursor"; case SERD_ERR_NOT_FOUND: return "Not found"; case SERD_ERR_ID_CLASH: @@ -58,6 +60,8 @@ serd_strerror(const SerdStatus status) return "Invalid call"; case SERD_ERR_BAD_URI: return "Invalid or unresolved URI"; + case SERD_ERR_BAD_INDEX: + return "No optimal model index available"; } return "Unknown error"; diff --git a/test/.clang-tidy b/test/.clang-tidy index dd609218..d8601571 100644 --- a/test/.clang-tidy +++ b/test/.clang-tidy @@ -10,6 +10,7 @@ Checks: > -clang-analyzer-nullability.NullabilityBase, -clang-analyzer-nullability.NullableDereferenced, -clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling, + -clang-analyzer-valist.Uninitialized, -cppcoreguidelines-avoid-non-const-global-variables, -hicpp-signed-bitwise, -llvmlibc-*, diff --git a/test/meson.build b/test/meson.build index 77a9ba57..c4dc4b3b 100644 --- a/test/meson.build +++ b/test/meson.build @@ -8,9 +8,11 @@ unit_tests = [ 'byte_sink', 'byte_source', 'caret', + 'cursor', 'env', 'free_null', 'log', + 'model', 'node', 'node_syntax', 'nodes', diff --git a/test/run_test_suite.py b/test/run_test_suite.py index f23f29e6..f9524a9c 100755 --- a/test/run_test_suite.py +++ b/test/run_test_suite.py @@ -230,6 +230,23 @@ def _file_equals(patha, pathb): return _show_diff(fa.readlines(), fb.readlines(), patha, pathb) +def _file_lines_equal(patha, pathb, subst_from="", subst_to=""): + import io + + for path in (patha, pathb): + if not os.access(path, os.F_OK): + sys.stderr.write("error: missing file %s" % path) + return False + + la = sorted(set(io.open(patha, encoding="utf-8").readlines())) + lb = sorted(set(io.open(pathb, encoding="utf-8").readlines())) + if la != lb: + _show_diff(la, lb, patha, pathb) + return False + + return True + + def test_suite( manifest_path, base_uri, @@ -333,6 +350,31 @@ def test_suite( command_prefix, ) + # Run model test for positive test (must succeed) + out_filename = os.path.join( + out_test_dir, test_name + ".model.out" + ) + + model_command = command_prefix + [ + "-m", + "-a", + "-o", + osyntax, + "-w", + out_filename, + "-I", + test_uri, + test_path, + ] + + proc = subprocess.run(model_command, check=True) + + if proc.returncode == 0 and ( + (mf + "result") in model[test] + ): + if not _file_lines_equal(check_path, out_filename): + results.n_failures += 1 + else: # Negative test with open(out_filename, "w") as stdout: with tempfile.TemporaryFile() as stderr: diff --git a/test/test_cursor.c b/test/test_cursor.c new file mode 100644 index 00000000..cade0621 --- /dev/null +++ b/test/test_cursor.c @@ -0,0 +1,114 @@ +/* + Copyright 2011-2021 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#undef NDEBUG + +#include "serd/serd.h" + +#include +#include + +static void +test_copy(void) +{ + assert(!serd_cursor_copy(NULL)); + + SerdWorld* const world = serd_world_new(); + SerdModel* const model = serd_model_new(world, SERD_ORDER_SPO, 0u); + SerdCursor* const begin = serd_model_begin(model); + SerdCursor* const copy = serd_cursor_copy(begin); + + assert(serd_cursor_equals(copy, begin)); + + serd_cursor_free(copy); + serd_cursor_free(begin); + serd_model_free(model); + serd_world_free(world); +} + +static void +test_comparison(void) +{ + SerdWorld* const world = serd_world_new(); + SerdNodes* const nodes = serd_world_nodes(world); + SerdModel* const model = serd_model_new(world, SERD_ORDER_SPO, 0u); + + serd_model_add_index(model, SERD_ORDER_OPS); + + const SerdNode* const a = + serd_nodes_uri(nodes, SERD_STRING("http://example.org/a")); + const SerdNode* const b = + serd_nodes_uri(nodes, SERD_STRING("http://example.org/b")); + const SerdNode* const c = + serd_nodes_uri(nodes, SERD_STRING("http://example.org/c")); + + // Add a single statement + assert(!serd_model_add(model, a, b, c, NULL)); + + // Make cursors that point to the statement but via different patterns + SerdCursor* const c1 = serd_model_find(model, a, NULL, NULL, NULL); + SerdCursor* const c2 = serd_model_find(model, a, b, NULL, NULL); + SerdCursor* const c3 = serd_model_find(model, NULL, b, c, NULL); + + // Ensure that they refer to the same statement but are not equal + assert(serd_cursor_get(c1) == serd_cursor_get(c2)); + assert(serd_cursor_get(c2) == serd_cursor_get(c3)); + assert(!serd_cursor_equals(c1, c2)); + assert(!serd_cursor_equals(c2, c3)); + assert(!serd_cursor_equals(c1, c3)); + + // Check that none are equal to begin (which has a different mode) or end + SerdCursor* const begin = serd_model_begin(model); + assert(!serd_cursor_equals(c1, begin)); + assert(!serd_cursor_equals(c2, begin)); + assert(!serd_cursor_equals(c3, begin)); + assert(!serd_cursor_equals(c1, serd_model_end(model))); + assert(!serd_cursor_equals(c2, serd_model_end(model))); + assert(!serd_cursor_equals(c3, serd_model_end(model))); + serd_cursor_free(begin); + + // Check that a cursor that points to it via the same pattern is equal + SerdCursor* const c4 = serd_model_find(model, a, b, NULL, NULL); + assert(serd_cursor_get(c4) == serd_cursor_get(c1)); + assert(serd_cursor_equals(c4, c2)); + assert(!serd_cursor_equals(c4, c3)); + serd_cursor_free(c4); + + // Advance everything to the end + assert(serd_cursor_advance(c1) == SERD_FAILURE); + assert(serd_cursor_advance(c2) == SERD_FAILURE); + assert(serd_cursor_advance(c3) == SERD_FAILURE); + + // Check that they are now equal, and equal to the model's end + assert(serd_cursor_equals(c1, c2)); + assert(serd_cursor_equals(c1, serd_model_end(model))); + assert(serd_cursor_equals(c2, serd_model_end(model))); + + serd_cursor_free(c3); + serd_cursor_free(c2); + serd_cursor_free(c1); + serd_model_free(model); + serd_world_free(world); +} + +int +main(void) +{ + test_copy(); + test_comparison(); + + return 0; +} diff --git a/test/test_free_null.c b/test/test_free_null.c index 28f7563f..1fa87979 100644 --- a/test/test_free_null.c +++ b/test/test_free_null.c @@ -33,6 +33,9 @@ main(void) serd_reader_free(NULL); serd_writer_free(NULL); serd_nodes_free(NULL); + serd_model_free(NULL); + serd_statement_free(NULL); + serd_cursor_free(NULL); serd_caret_free(NULL); return 0; diff --git a/test/test_model.c b/test/test_model.c new file mode 100644 index 00000000..4cedfb2b --- /dev/null +++ b/test/test_model.c @@ -0,0 +1,1396 @@ +/* + Copyright 2011-2021 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#undef NDEBUG + +#include +#include +#include +#include +#include + +#include "serd/serd.h" + +#define WILDCARD_NODE NULL + +#define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#" +#define RDF_FIRST NS_RDF "first" +#define RDF_REST NS_RDF "rest" +#define RDF_NIL NS_RDF "nil" + +#define N_OBJECTS_PER 2U + +typedef const SerdNode* Quad[4]; + +typedef struct { + Quad query; + int expected_num_results; +} QueryTest; + +static const SerdNode* +uri(SerdWorld* world, const unsigned num) +{ + char str[] = "http://example.org/0000000000"; + + snprintf(str + 19, 11, "%010u", num); + + return serd_nodes_uri(serd_world_nodes(world), SERD_STRING(str)); +} + +static int +generate(SerdWorld* world, + SerdModel* model, + size_t n_quads, + const SerdNode* graph) +{ + SerdNodes* nodes = serd_world_nodes(world); + SerdStatus st = SERD_SUCCESS; + + for (unsigned i = 0; i < n_quads; ++i) { + unsigned num = (i * N_OBJECTS_PER) + 1U; + + const SerdNode* ids[2 + N_OBJECTS_PER]; + for (unsigned j = 0; j < 2 + N_OBJECTS_PER; ++j) { + ids[j] = uri(world, num++); + } + + for (unsigned j = 0; j < N_OBJECTS_PER; ++j) { + st = serd_model_add(model, ids[0], ids[1], ids[2 + j], graph); + assert(!st); + } + } + + // Add some literals + + // (98 4 "hello") and (98 4 "hello"^^<5>) + const SerdNode* hello = serd_nodes_string(nodes, SERD_STRING("hello")); + + const SerdNode* hello_gb = serd_nodes_literal( + nodes, SERD_STRING("hello"), SERD_HAS_LANGUAGE, SERD_STRING("en-gb")); + + const SerdNode* hello_us = serd_nodes_literal( + nodes, SERD_STRING("hello"), SERD_HAS_LANGUAGE, SERD_STRING("en-us")); + + const SerdNode* hello_t4 = + serd_nodes_literal(nodes, + SERD_STRING("hello"), + SERD_HAS_DATATYPE, + serd_node_string_view(uri(world, 4))); + + const SerdNode* hello_t5 = + serd_nodes_literal(nodes, + SERD_STRING("hello"), + SERD_HAS_DATATYPE, + serd_node_string_view(uri(world, 5))); + + assert(!serd_model_add(model, uri(world, 98), uri(world, 4), hello, graph)); + assert( + !serd_model_add(model, uri(world, 98), uri(world, 4), hello_t5, graph)); + + // (96 4 "hello"^^<4>) and (96 4 "hello"^^<5>) + assert( + !serd_model_add(model, uri(world, 96), uri(world, 4), hello_t4, graph)); + assert( + !serd_model_add(model, uri(world, 96), uri(world, 4), hello_t5, graph)); + + // (94 5 "hello") and (94 5 "hello"@en-gb) + assert(!serd_model_add(model, uri(world, 94), uri(world, 5), hello, graph)); + assert( + !serd_model_add(model, uri(world, 94), uri(world, 5), hello_gb, graph)); + + // (92 6 "hello"@en-us) and (92 6 "hello"@en-gb) + assert( + !serd_model_add(model, uri(world, 92), uri(world, 6), hello_us, graph)); + assert( + !serd_model_add(model, uri(world, 92), uri(world, 6), hello_gb, graph)); + + // (14 6 "bonjour"@fr) and (14 6 "salut"@fr) + const SerdNode* const bonjour = serd_nodes_literal( + nodes, SERD_STRING("bonjour"), SERD_HAS_LANGUAGE, SERD_STRING("fr")); + + const SerdNode* const salut = serd_nodes_literal( + nodes, SERD_STRING("salut"), SERD_HAS_LANGUAGE, SERD_STRING("fr")); + + assert(!serd_model_add(model, uri(world, 14), uri(world, 6), bonjour, graph)); + assert(!serd_model_add(model, uri(world, 14), uri(world, 6), salut, graph)); + + // Attempt to add duplicates + assert(serd_model_add(model, uri(world, 14), uri(world, 6), salut, graph)); + + // Add a blank node subject + const SerdNode* ablank = serd_nodes_blank(nodes, SERD_STRING("ablank")); + + assert(!serd_model_add(model, ablank, uri(world, 6), salut, graph)); + + // Add statement with URI object + assert(!serd_model_add(model, ablank, uri(world, 6), uri(world, 7), graph)); + + return EXIT_SUCCESS; +} + +static int +test_read(SerdWorld* world, + SerdModel* model, + const SerdNode* g, + const unsigned n_quads) +{ + SerdNodes* const nodes = serd_nodes_new(); + + SerdCursor* cursor = serd_model_begin(model); + const SerdStatement* prev = NULL; + for (; !serd_cursor_equals(cursor, serd_model_end(model)); + serd_cursor_advance(cursor)) { + const SerdStatement* statement = serd_cursor_get(cursor); + assert(statement); + assert(serd_statement_subject(statement)); + assert(serd_statement_predicate(statement)); + assert(serd_statement_object(statement)); + assert(!serd_statement_equals(statement, prev)); + assert(!serd_statement_equals(prev, statement)); + prev = statement; + } + + // Attempt to increment past end + assert(serd_cursor_advance(cursor) == SERD_ERR_BAD_CURSOR); + serd_cursor_free(cursor); + + const SerdStringView s = SERD_STRING("hello"); + + const SerdNode* plain_hello = serd_nodes_string(nodes, s); + + const SerdNode* type4_hello = serd_nodes_literal( + nodes, s, SERD_HAS_DATATYPE, serd_node_string_view(uri(world, 4))); + + const SerdNode* type5_hello = serd_nodes_literal( + nodes, s, SERD_HAS_DATATYPE, serd_node_string_view(uri(world, 5))); + + const SerdNode* gb_hello = + serd_nodes_literal(nodes, s, SERD_HAS_LANGUAGE, SERD_STRING("en-gb")); + + const SerdNode* us_hello = + serd_nodes_literal(nodes, s, SERD_HAS_LANGUAGE, SERD_STRING("en-us")); + +#define NUM_PATTERNS 18 + + QueryTest patterns[NUM_PATTERNS] = { + {{NULL, NULL, NULL}, (int)(n_quads * N_OBJECTS_PER) + 12}, + {{uri(world, 1), WILDCARD_NODE, WILDCARD_NODE}, 2}, + {{uri(world, 9), uri(world, 9), uri(world, 9)}, 0}, + {{uri(world, 1), uri(world, 2), uri(world, 4)}, 1}, + {{uri(world, 3), uri(world, 4), WILDCARD_NODE}, 2}, + {{WILDCARD_NODE, uri(world, 2), uri(world, 4)}, 1}, + {{WILDCARD_NODE, WILDCARD_NODE, uri(world, 4)}, 1}, + {{uri(world, 1), WILDCARD_NODE, WILDCARD_NODE}, 2}, + {{uri(world, 1), WILDCARD_NODE, uri(world, 4)}, 1}, + {{WILDCARD_NODE, uri(world, 2), WILDCARD_NODE}, 2}, + {{uri(world, 98), uri(world, 4), plain_hello}, 1}, + {{uri(world, 98), uri(world, 4), type5_hello}, 1}, + {{uri(world, 96), uri(world, 4), type4_hello}, 1}, + {{uri(world, 96), uri(world, 4), type5_hello}, 1}, + {{uri(world, 94), uri(world, 5), plain_hello}, 1}, + {{uri(world, 94), uri(world, 5), gb_hello}, 1}, + {{uri(world, 92), uri(world, 6), gb_hello}, 1}, + {{uri(world, 92), uri(world, 6), us_hello}, 1}}; + + Quad match = {uri(world, 1), uri(world, 2), uri(world, 4), g}; + assert(serd_model_ask(model, match[0], match[1], match[2], match[3])); + + Quad nomatch = {uri(world, 1), uri(world, 2), uri(world, 9), g}; + assert( + !serd_model_ask(model, nomatch[0], nomatch[1], nomatch[2], nomatch[3])); + + assert(!serd_model_get(model, NULL, NULL, uri(world, 3), g)); + assert(!serd_model_get(model, uri(world, 1), uri(world, 99), NULL, g)); + + assert(serd_node_equals( + serd_model_get(model, uri(world, 1), uri(world, 2), NULL, g), + uri(world, 3))); + assert(serd_node_equals( + serd_model_get(model, uri(world, 1), NULL, uri(world, 3), g), + uri(world, 2))); + assert(serd_node_equals( + serd_model_get(model, NULL, uri(world, 2), uri(world, 3), g), + uri(world, 1))); + if (g) { + assert(serd_node_equals( + serd_model_get(model, uri(world, 1), uri(world, 2), uri(world, 3), NULL), + g)); + } + + for (unsigned i = 0; i < NUM_PATTERNS; ++i) { + QueryTest test = patterns[i]; + Quad pat = {test.query[0], test.query[1], test.query[2], g}; + + SerdCursor* range = serd_model_find(model, pat[0], pat[1], pat[2], pat[3]); + int num_results = 0; + for (; !serd_cursor_is_end(range); serd_cursor_advance(range)) { + ++num_results; + + const SerdStatement* first = serd_cursor_get(range); + assert(first); + assert(serd_statement_matches(first, pat[0], pat[1], pat[2], pat[3])); + } + + serd_cursor_free(range); + + assert(num_results == test.expected_num_results); + } + + // Query blank node subject + + const SerdNode* ablank = serd_nodes_blank(nodes, SERD_STRING("ablank")); + + Quad pat = {ablank, 0, 0}; + int num_results = 0; + SerdCursor* range = serd_model_find(model, pat[0], pat[1], pat[2], pat[3]); + for (; !serd_cursor_is_end(range); serd_cursor_advance(range)) { + ++num_results; + const SerdStatement* statement = serd_cursor_get(range); + assert(serd_statement_matches(statement, pat[0], pat[1], pat[2], pat[3])); + } + serd_cursor_free(range); + + assert(num_results == 2); + + // Test nested queries + const SerdNode* last_subject = 0; + range = serd_model_find(model, NULL, NULL, NULL, NULL); + for (; !serd_cursor_is_end(range); serd_cursor_advance(range)) { + const SerdStatement* statement = serd_cursor_get(range); + const SerdNode* subject = serd_statement_subject(statement); + if (subject == last_subject) { + continue; + } + + Quad subpat = {subject, 0, 0}; + SerdCursor* const subrange = + serd_model_find(model, subpat[0], subpat[1], subpat[2], subpat[3]); + + assert(subrange); + + const SerdStatement* substatement = serd_cursor_get(subrange); + uint64_t num_sub_results = 0; + assert(serd_statement_subject(substatement) == subject); + for (; !serd_cursor_is_end(subrange); serd_cursor_advance(subrange)) { + const SerdStatement* const front = serd_cursor_get(subrange); + assert(front); + + assert(serd_statement_matches( + front, subpat[0], subpat[1], subpat[2], subpat[3])); + + ++num_sub_results; + } + serd_cursor_free(subrange); + assert(num_sub_results == N_OBJECTS_PER); + + uint64_t count = serd_model_count(model, subject, 0, 0, 0); + assert(count == num_sub_results); + + last_subject = subject; + } + serd_cursor_free(range); + + serd_nodes_free(nodes); + return 0; +} + +static SerdStatus +expected_error(void* const handle, + const SerdLogLevel level, + const size_t n_fields, + const SerdLogField* const fields, + const SerdStringView message) +{ + (void)level; + (void)n_fields; + (void)fields; + (void)handle; + + fprintf(stderr, "expected: %s\n", message.buf); + return SERD_SUCCESS; +} + +static SerdStatus +ignore_only_index_error(void* const handle, + const SerdLogLevel level, + const size_t n_fields, + const SerdLogField* const fields, + const SerdStringView message) +{ + (void)handle; + (void)level; + (void)n_fields; + (void)fields; + + if (!strstr(message.buf, "index")) { + fprintf(stderr, "error: %s\n", message.buf); + } + + assert(strstr(message.buf, "index")); + + return SERD_SUCCESS; +} + +static int +test_free_null(SerdWorld* world, const unsigned n_quads) +{ + (void)world; + (void)n_quads; + + serd_model_free(NULL); // Shouldn't crash + return 0; +} + +static int +test_get_world(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0u); + assert(serd_model_world(model) == world); + serd_model_free(model); + return 0; +} + +static int +test_get_default_order(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdModel* const model1 = serd_model_new(world, SERD_ORDER_SPO, 0u); + SerdModel* const model2 = serd_model_new(world, SERD_ORDER_GPSO, 0u); + + assert(serd_model_default_order(model1) == SERD_ORDER_SPO); + assert(serd_model_default_order(model2) == SERD_ORDER_GPSO); + + serd_model_free(model2); + serd_model_free(model1); + return 0; +} + +static int +test_get_flags(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + const SerdModelFlags flags = SERD_STORE_GRAPHS | SERD_STORE_CARETS; + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, flags); + + assert(serd_model_flags(model) & SERD_STORE_GRAPHS); + assert(serd_model_flags(model) & SERD_STORE_CARETS); + serd_model_free(model); + return 0; +} + +static int +test_all_begin(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0u); + SerdCursor* begin = serd_model_begin(model); + SerdCursor* first = serd_model_find(model, NULL, NULL, NULL, NULL); + + assert(serd_cursor_equals(begin, first)); + + serd_cursor_free(first); + serd_cursor_free(begin); + serd_model_free(model); + return 0; +} + +static int +test_begin_ordered(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, SERD_STORE_GRAPHS); + + assert( + !serd_model_add(model, uri(world, 1), uri(world, 2), uri(world, 3), 0)); + + SerdCursor* i = serd_model_begin_ordered(model, SERD_ORDER_SPO); + assert(i); + assert(!serd_cursor_is_end(i)); + serd_cursor_free(i); + + i = serd_model_begin_ordered(model, SERD_ORDER_POS); + assert(serd_cursor_is_end(i)); + serd_cursor_free(i); + + serd_model_free(model); + return 0; +} + +static int +test_add_with_iterator(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0u); + + serd_set_log_func(world, expected_error, NULL); + assert( + !serd_model_add(model, uri(world, 1), uri(world, 2), uri(world, 3), 0)); + + // Add a statement with an active iterator + SerdCursor* iter = serd_model_begin(model); + assert( + !serd_model_add(model, uri(world, 1), uri(world, 2), uri(world, 4), 0)); + + // Check that iterator has been invalidated + assert(!serd_cursor_get(iter)); + assert(serd_cursor_advance(iter) == SERD_ERR_BAD_CURSOR); + + serd_cursor_free(iter); + serd_model_free(model); + return 0; +} + +static int +test_add_remove_nodes(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0u); + + assert(serd_model_nodes(model)); + assert(serd_nodes_size(serd_model_nodes(model)) == 0); + + const SerdNode* const a = uri(world, 1); + const SerdNode* const b = uri(world, 2); + const SerdNode* const c = uri(world, 3); + + // Add 2 statements with 3 nodes + assert(!serd_model_add(model, a, b, a, NULL)); + assert(!serd_model_add(model, c, b, c, NULL)); + assert(serd_model_size(model) == 2); + assert(serd_nodes_size(serd_model_nodes(model)) == 3); + + // Remove one statement to leave 2 nodes + SerdCursor* const begin = serd_model_begin(model); + assert(!serd_model_erase(model, begin)); + assert(serd_model_size(model) == 1); + assert(serd_nodes_size(serd_model_nodes(model)) == 2); + serd_cursor_free(begin); + + // Clear the last statement to leave 0 nodes + assert(!serd_model_clear(model)); + assert(serd_nodes_size(serd_model_nodes(model)) == 0); + + serd_model_free(model); + return 0; +} + +static int +test_add_index(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdModel* const model = serd_model_new(world, SERD_ORDER_SPO, 0u); + const SerdNode* const s = uri(world, 0); + const SerdNode* const p = uri(world, 1); + const SerdNode* const o1 = uri(world, 2); + const SerdNode* const o2 = uri(world, 3); + + // Try to add an existing index + assert(serd_model_add_index(model, SERD_ORDER_SPO) == SERD_FAILURE); + + // Add a couple of statements + serd_model_add(model, s, p, o1, NULL); + serd_model_add(model, s, p, o2, NULL); + assert(serd_model_size(model) == 2); + + // Add a new index + assert(!serd_model_add_index(model, SERD_ORDER_PSO)); + + // Count statements via the new index + size_t count = 0u; + SerdCursor* cur = serd_model_find(model, NULL, p, NULL, NULL); + while (!serd_cursor_is_end(cur)) { + ++count; + serd_cursor_advance(cur); + } + serd_cursor_free(cur); + + serd_model_free(model); + assert(count == 2); + return 0; +} + +static int +test_remove_index(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdModel* const model = serd_model_new(world, SERD_ORDER_SPO, 0u); + const SerdNode* const s = uri(world, 0); + const SerdNode* const p = uri(world, 1); + const SerdNode* const o1 = uri(world, 2); + const SerdNode* const o2 = uri(world, 3); + + // Try to remove default and non-existent indices + assert(serd_model_drop_index(model, SERD_ORDER_SPO) == SERD_ERR_BAD_CALL); + assert(serd_model_drop_index(model, SERD_ORDER_PSO) == SERD_FAILURE); + + // Add a couple of statements so that dropping an index isn't trivial + serd_model_add(model, s, p, o1, NULL); + serd_model_add(model, s, p, o2, NULL); + assert(serd_model_size(model) == 2); + + assert(serd_model_add_index(model, SERD_ORDER_PSO) == SERD_SUCCESS); + assert(serd_model_drop_index(model, SERD_ORDER_PSO) == SERD_SUCCESS); + assert(serd_model_drop_index(model, SERD_ORDER_PSO) == SERD_FAILURE); + assert(serd_model_size(model) == 2); + serd_model_free(model); + return 0; +} + +static int +test_inserter(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdNodes* const nodes = serd_nodes_new(); + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0u); + SerdSink* inserter = serd_inserter_new(model, NULL); + + const SerdNode* const s = + serd_nodes_uri(nodes, SERD_STRING("http://example.org/s")); + + const SerdNode* const p = + serd_nodes_uri(nodes, SERD_STRING("http://example.org/p")); + + const SerdNode* const rel = serd_nodes_uri(nodes, SERD_STRING("rel")); + + serd_set_log_func(world, expected_error, NULL); + + assert(serd_sink_write(inserter, 0, s, p, rel, NULL) == SERD_ERR_BAD_ARG); + + serd_sink_free(inserter); + serd_model_free(model); + serd_nodes_free(nodes); + return 0; +} + +static int +test_erase_with_iterator(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0u); + + serd_set_log_func(world, expected_error, NULL); + assert( + !serd_model_add(model, uri(world, 1), uri(world, 2), uri(world, 3), 0)); + assert( + !serd_model_add(model, uri(world, 4), uri(world, 5), uri(world, 6), 0)); + + // Erase a statement with an active iterator + SerdCursor* iter1 = serd_model_begin(model); + SerdCursor* iter2 = serd_model_begin(model); + assert(!serd_model_erase(model, iter1)); + + // Check that erased iterator points to the next statement + const SerdStatement* const s1 = serd_cursor_get(iter1); + assert(s1); + assert( + serd_statement_matches(s1, uri(world, 4), uri(world, 5), uri(world, 6), 0)); + + // Check that other iterator has been invalidated + assert(!serd_cursor_get(iter2)); + assert(serd_cursor_advance(iter2) == SERD_ERR_BAD_CURSOR); + + // Check that erasing the end iterator does nothing + SerdCursor* const end = serd_cursor_copy(serd_model_end(model)); + assert(serd_model_erase(model, end) == SERD_FAILURE); + + serd_cursor_free(end); + serd_cursor_free(iter2); + serd_cursor_free(iter1); + serd_model_free(model); + return 0; +} + +static int +test_add_erase(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdNodes* const nodes = serd_nodes_new(); + SerdModel* const model = serd_model_new(world, SERD_ORDER_SPO, 0u); + + // Add (s p "hello") + const SerdNode* s = uri(world, 1); + const SerdNode* p = uri(world, 2); + const SerdNode* hello = serd_nodes_string(nodes, SERD_STRING("hello")); + + assert(!serd_model_add(model, s, p, hello, NULL)); + assert(serd_model_ask(model, s, p, hello, NULL)); + + // Add (s p "hi") + const SerdNode* hi = serd_nodes_string(nodes, SERD_STRING("hi")); + assert(!serd_model_add(model, s, p, hi, NULL)); + assert(serd_model_ask(model, s, p, hi, NULL)); + + // Erase (s p "hi") + SerdCursor* iter = serd_model_find(model, s, p, hi, NULL); + assert(!serd_model_erase(model, iter)); + assert(serd_model_size(model) == 1); + serd_cursor_free(iter); + + // Check that erased statement can not be found + SerdCursor* empty = serd_model_find(model, s, p, hi, NULL); + assert(serd_cursor_is_end(empty)); + serd_cursor_free(empty); + + serd_model_free(model); + serd_nodes_free(nodes); + return 0; +} + +static int +test_add_bad_statement(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdNodes* const nodes = serd_nodes_new(); + const SerdNode* s = serd_nodes_uri(nodes, SERD_STRING("urn:s")); + const SerdNode* p = serd_nodes_uri(nodes, SERD_STRING("urn:p")); + const SerdNode* o = serd_nodes_uri(nodes, SERD_STRING("urn:o")); + + const SerdNode* f = + serd_nodes_uri(nodes, SERD_STRING("file:///tmp/file.ttl")); + + SerdCaret* caret = serd_caret_new(f, 16, 18); + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0u); + + assert(!serd_model_add_with_caret(model, s, p, o, NULL, caret)); + + SerdCursor* const begin = serd_model_begin(model); + const SerdStatement* statement = serd_cursor_get(begin); + assert(statement); + + assert(serd_node_equals(serd_statement_subject(statement), s)); + assert(serd_node_equals(serd_statement_predicate(statement), p)); + assert(serd_node_equals(serd_statement_object(statement), o)); + assert(!serd_statement_graph(statement)); + + const SerdCaret* statement_caret = serd_statement_caret(statement); + assert(statement_caret); + assert(serd_node_equals(serd_caret_name(statement_caret), f)); + assert(serd_caret_line(statement_caret) == 16); + assert(serd_caret_column(statement_caret) == 18); + + assert(!serd_model_erase(model, begin)); + + serd_cursor_free(begin); + serd_model_free(model); + serd_caret_free(caret); + serd_nodes_free(nodes); + return 0; +} + +static int +test_add_with_caret(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdNodes* const nodes = serd_nodes_new(); + const SerdNode* lit = serd_nodes_string(nodes, SERD_STRING("string")); + const SerdNode* uri = serd_nodes_uri(nodes, SERD_STRING("urn:uri")); + const SerdNode* blank = serd_nodes_blank(nodes, SERD_STRING("b1")); + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0u); + + assert(serd_model_add(model, lit, uri, uri, NULL)); + assert(serd_model_add(model, uri, blank, uri, NULL)); + assert(serd_model_add(model, uri, uri, uri, lit)); + + serd_model_free(model); + serd_nodes_free(nodes); + return 0; +} + +static int +test_erase_all(SerdWorld* world, const unsigned n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0u); + serd_model_add_index(model, SERD_ORDER_OSP); + generate(world, model, n_quads, NULL); + + SerdCursor* iter = serd_model_begin(model); + while (!serd_cursor_equals(iter, serd_model_end(model))) { + assert(!serd_model_erase(model, iter)); + } + + assert(serd_model_empty(model)); + + serd_cursor_free(iter); + serd_model_free(model); + return 0; +} + +static int +test_clear(SerdWorld* world, const unsigned n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0u); + generate(world, model, n_quads, NULL); + + serd_model_clear(model); + assert(serd_model_empty(model)); + + serd_model_free(model); + return 0; +} + +static int +test_copy(SerdWorld* world, const unsigned n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0u); + generate(world, model, n_quads, NULL); + + SerdModel* copy = serd_model_copy(model); + assert(serd_model_equals(model, copy)); + + serd_model_free(model); + serd_model_free(copy); + return 0; +} + +static int +test_equals(SerdWorld* world, const unsigned n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0u); + generate(world, model, n_quads, NULL); + serd_model_add( + model, uri(world, 0), uri(world, 1), uri(world, 2), uri(world, 3)); + + assert(serd_model_equals(NULL, NULL)); + assert(!serd_model_equals(NULL, model)); + assert(!serd_model_equals(model, NULL)); + + SerdModel* empty = serd_model_new(world, SERD_ORDER_SPO, 0u); + assert(!serd_model_equals(model, empty)); + + SerdModel* different = serd_model_new(world, SERD_ORDER_SPO, 0u); + generate(world, different, n_quads, NULL); + serd_model_add( + different, uri(world, 1), uri(world, 1), uri(world, 2), uri(world, 3)); + + assert(serd_model_size(model) == serd_model_size(different)); + assert(!serd_model_equals(model, different)); + + serd_model_free(model); + serd_model_free(empty); + serd_model_free(different); + return 0; +} + +static int +test_find_past_end(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0u); + const SerdNode* s = uri(world, 1); + const SerdNode* p = uri(world, 2); + const SerdNode* o = uri(world, 3); + assert(!serd_model_add(model, s, p, o, 0)); + assert(serd_model_ask(model, s, p, o, 0)); + + const SerdNode* huge = uri(world, 999); + SerdCursor* range = serd_model_find(model, huge, huge, huge, 0); + assert(serd_cursor_is_end(range)); + + serd_cursor_free(range); + serd_model_free(model); + return 0; +} + +static int +test_find_unknown_node(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + const SerdNode* const s = uri(world, 1); + const SerdNode* const p = uri(world, 2); + const SerdNode* const o = uri(world, 3); + + SerdModel* const model = + serd_model_new(world, SERD_ORDER_SPO, SERD_STORE_GRAPHS); + + // Add one statement + assert(!serd_model_add(model, s, p, o, NULL)); + assert(serd_model_ask(model, s, p, o, NULL)); + + /* Test searching for statements that contain a non-existent node. This is + semantically equivalent to any other non-matching pattern, but can be + implemented with a fast path that avoids searching a statement index + entirely. */ + + const SerdNode* const q = uri(world, 42); + assert(!serd_model_ask(model, s, p, o, q)); + assert(!serd_model_ask(model, s, p, q, NULL)); + assert(!serd_model_ask(model, s, q, o, NULL)); + assert(!serd_model_ask(model, q, p, o, NULL)); + + serd_model_free(model); + return 0; +} + +static int +test_find_graph(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + const SerdNode* const s = uri(world, 1); + const SerdNode* const p = uri(world, 2); + const SerdNode* const o1 = uri(world, 3); + const SerdNode* const o2 = uri(world, 4); + const SerdNode* const g = uri(world, 5); + + for (unsigned indexed = 0u; indexed < 2u; ++indexed) { + SerdModel* const model = + serd_model_new(world, SERD_ORDER_SPO, SERD_STORE_GRAPHS); + + if (indexed) { + serd_model_add_index(model, SERD_ORDER_GSPO); + } + + // Add one statement in a named graph and one in the default graph + assert(!serd_model_add(model, s, p, o1, NULL)); + assert(!serd_model_add(model, s, p, o2, g)); + + // Both statements can be found in the default graph + assert(serd_model_ask(model, s, p, o1, NULL)); + assert(serd_model_ask(model, s, p, o2, NULL)); + + // Only the one statement can be found in the named graph + assert(!serd_model_ask(model, s, p, o1, g)); + assert(serd_model_ask(model, s, p, o2, g)); + + serd_model_free(model); + } + + return 0; +} + +static int +test_range(SerdWorld* world, const unsigned n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0u); + generate(world, model, n_quads, NULL); + + SerdCursor* range1 = serd_model_begin(model); + SerdCursor* range2 = serd_model_begin(model); + + assert(!serd_cursor_is_end(range1)); + assert(serd_cursor_is_end(NULL)); + + assert(serd_cursor_equals(NULL, NULL)); + assert(!serd_cursor_equals(range1, NULL)); + assert(!serd_cursor_equals(NULL, range1)); + assert(serd_cursor_equals(range1, range2)); + + assert(!serd_cursor_advance(range2)); + assert(!serd_cursor_equals(range1, range2)); + + serd_cursor_free(range2); + serd_cursor_free(range1); + serd_model_free(model); + + return 0; +} + +static int +test_triple_index_read(SerdWorld* world, const unsigned n_quads) +{ + serd_set_log_func(world, ignore_only_index_error, NULL); + + for (unsigned i = 0; i < 6; ++i) { + SerdModel* model = serd_model_new(world, (SerdStatementOrder)i, 0u); + generate(world, model, n_quads, 0); + assert(!test_read(world, model, 0, n_quads)); + serd_model_free(model); + } + + return 0; +} + +static int +test_quad_index_read(SerdWorld* world, const unsigned n_quads) +{ + serd_set_log_func(world, ignore_only_index_error, NULL); + + for (unsigned i = 0; i < 6; ++i) { + SerdModel* model = + serd_model_new(world, (SerdStatementOrder)i, SERD_STORE_GRAPHS); + + const SerdNode* graph = uri(world, 42); + generate(world, model, n_quads, graph); + assert(!test_read(world, model, graph, n_quads)); + serd_model_free(model); + } + + return 0; +} + +static int +test_remove_graph(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_ORDER_GSPO, SERD_STORE_GRAPHS); + + // Generate a couple of graphs + const SerdNode* graph42 = uri(world, 42); + const SerdNode* graph43 = uri(world, 43); + generate(world, model, 1, graph42); + generate(world, model, 1, graph43); + + // Remove one graph via range + SerdCursor* range = serd_model_find(model, NULL, NULL, NULL, graph43); + SerdStatus st = serd_model_erase_statements(model, range); + assert(!st); + serd_cursor_free(range); + + // Erase the first tuple (an element in the default graph) + SerdCursor* iter = serd_model_begin(model); + assert(!serd_model_erase(model, iter)); + serd_cursor_free(iter); + + // Ensure only the other graph is left + Quad pat = {0, 0, 0, graph42}; + /* fprintf(stderr, "GRAPH: %s\n", serd_node_string(graph42)); */ + for (iter = serd_model_begin(model); + !serd_cursor_equals(iter, serd_model_end(model)); + serd_cursor_advance(iter)) { + const SerdStatement* const s = serd_cursor_get(iter); + assert(s); + /* fprintf(stderr, "STATEMENT GRAPH: %s\n", serd_node_string(pat[3])); */ + assert(serd_statement_matches(s, pat[0], pat[1], pat[2], pat[3])); + } + serd_cursor_free(iter); + + serd_model_free(model); + return 0; +} + +static int +test_default_graph(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + const SerdNode* s = uri(world, 1); + const SerdNode* p = uri(world, 2); + const SerdNode* o = uri(world, 3); + const SerdNode* g1 = uri(world, 101); + const SerdNode* g2 = uri(world, 102); + + { + // Make a model that does not store graphs + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0u); + + // Insert a statement into a graph (which will be dropped) + assert(!serd_model_add(model, s, p, o, g1)); + + // Attempt to insert the same statement into another graph + assert(serd_model_add(model, s, p, o, g2) == SERD_FAILURE); + + // Ensure that we only see the statement once + assert(serd_model_count(model, s, p, o, NULL) == 1); + + serd_model_free(model); + } + + { + // Make a model that stores graphs + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, SERD_STORE_GRAPHS); + + // Insert the same statement into two graphs + assert(!serd_model_add(model, s, p, o, g1)); + assert(!serd_model_add(model, s, p, o, g2)); + + // Ensure we see the statement twice + assert(serd_model_count(model, s, p, o, NULL) == 2); + + serd_model_free(model); + } + + return 0; +} + +static int +test_write_flat_range(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, SERD_STORE_GRAPHS); + SerdNodes* nodes = serd_nodes_new(); + + const SerdNode* s = serd_nodes_uri(nodes, SERD_STRING("urn:s")); + const SerdNode* p = serd_nodes_uri(nodes, SERD_STRING("urn:p")); + const SerdNode* b1 = serd_nodes_blank(nodes, SERD_STRING("b1")); + const SerdNode* b2 = serd_nodes_blank(nodes, SERD_STRING("b2")); + const SerdNode* o = serd_nodes_uri(nodes, SERD_STRING("urn:o")); + + serd_model_add(model, s, p, b1, NULL); + serd_model_add(model, b1, p, o, NULL); + serd_model_add(model, s, p, b2, NULL); + serd_model_add(model, b2, p, o, NULL); + + SerdBuffer buffer = {NULL, 0}; + SerdEnv* env = serd_env_new(SERD_EMPTY_STRING()); + SerdByteSink* out = serd_byte_sink_new_buffer(&buffer); + SerdWriter* writer = serd_writer_new(world, SERD_TURTLE, 0, env, out); + + SerdCursor* all = serd_model_begin(model); + serd_describe_range(all, serd_writer_sink(writer), SERD_NO_INLINE_OBJECTS); + serd_cursor_free(all); + + serd_writer_finish(writer); + const char* str = serd_buffer_sink_finish(&buffer); + const char* expected = "\n" + " _:b1 ,\n" + " _:b2 .\n" + "\n" + "_:b1\n" + " .\n" + "\n" + "_:b2\n" + " .\n"; + + assert(!strcmp(str, expected)); + + free(buffer.buf); + serd_writer_free(writer); + serd_byte_sink_free(out); + serd_model_free(model); + serd_env_free(env); + serd_nodes_free(nodes); + return 0; +} + +static int +test_write_bad_list(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, SERD_STORE_GRAPHS); + SerdNodes* nodes = serd_nodes_new(); + + serd_model_add_index(model, SERD_ORDER_OPS); + + const SerdNode* s = serd_nodes_uri(nodes, SERD_STRING("urn:s")); + const SerdNode* p = serd_nodes_uri(nodes, SERD_STRING("urn:p")); + const SerdNode* list1 = serd_nodes_blank(nodes, SERD_STRING("l1")); + const SerdNode* list2 = serd_nodes_blank(nodes, SERD_STRING("l2")); + const SerdNode* nofirst = serd_nodes_blank(nodes, SERD_STRING("nof")); + const SerdNode* norest = serd_nodes_blank(nodes, SERD_STRING("nor")); + const SerdNode* pfirst = serd_nodes_uri(nodes, SERD_STRING(RDF_FIRST)); + const SerdNode* prest = serd_nodes_uri(nodes, SERD_STRING(RDF_REST)); + const SerdNode* val1 = serd_nodes_string(nodes, SERD_STRING("a")); + const SerdNode* val2 = serd_nodes_string(nodes, SERD_STRING("b")); + + // List where second node has no rdf:first + serd_model_add(model, s, p, list1, NULL); + serd_model_add(model, list1, pfirst, val1, NULL); + serd_model_add(model, list1, prest, nofirst, NULL); + + // List where second node has no rdf:rest + serd_model_add(model, s, p, list2, NULL); + serd_model_add(model, list2, pfirst, val1, NULL); + serd_model_add(model, list2, prest, norest, NULL); + serd_model_add(model, norest, pfirst, val2, NULL); + + SerdBuffer buffer = {NULL, 0}; + SerdEnv* env = serd_env_new(SERD_EMPTY_STRING()); + SerdByteSink* out = serd_byte_sink_new_buffer(&buffer); + SerdWriter* writer = serd_writer_new(world, SERD_TURTLE, 0, env, out); + + SerdCursor* all = serd_model_begin(model); + serd_describe_range(all, serd_writer_sink(writer), 0); + serd_cursor_free(all); + + serd_writer_finish(writer); + const char* str = serd_buffer_sink_finish(&buffer); + const char* expected = "\n" + " (\n" + " \"a\"\n" + " ) , (\n" + " \"a\"\n" + " \"b\"\n" + " ) .\n"; + + assert(!strcmp(str, expected)); + + free(buffer.buf); + serd_writer_free(writer); + serd_byte_sink_free(out); + serd_model_free(model); + serd_env_free(env); + serd_nodes_free(nodes); + return 0; +} + +static int +test_write_infinite_list(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, SERD_STORE_GRAPHS); + SerdNodes* nodes = serd_nodes_new(); + + serd_model_add_index(model, SERD_ORDER_OPS); + + const SerdNode* s = serd_nodes_uri(nodes, SERD_STRING("urn:s")); + const SerdNode* p = serd_nodes_uri(nodes, SERD_STRING("urn:p")); + const SerdNode* list1 = serd_nodes_blank(nodes, SERD_STRING("l1")); + const SerdNode* list2 = serd_nodes_blank(nodes, SERD_STRING("l2")); + + const SerdNode* pfirst = serd_nodes_uri(nodes, SERD_STRING(RDF_FIRST)); + const SerdNode* prest = serd_nodes_uri(nodes, SERD_STRING(RDF_REST)); + const SerdNode* val1 = serd_nodes_string(nodes, SERD_STRING("a")); + const SerdNode* val2 = serd_nodes_string(nodes, SERD_STRING("b")); + + // List with a cycle: list1 -> list2 -> list1 -> list2 ... + serd_model_add(model, s, p, list1, NULL); + serd_model_add(model, list1, pfirst, val1, NULL); + serd_model_add(model, list1, prest, list2, NULL); + serd_model_add(model, list2, pfirst, val2, NULL); + serd_model_add(model, list2, prest, list1, NULL); + + SerdBuffer buffer = {NULL, 0}; + SerdEnv* env = serd_env_new(SERD_EMPTY_STRING()); + SerdByteSink* out = serd_byte_sink_new_buffer(&buffer); + SerdWriter* writer = serd_writer_new(world, SERD_TURTLE, 0, env, out); + + serd_env_set_prefix( + env, + SERD_STRING("rdf"), + SERD_STRING("http://www.w3.org/1999/02/22-rdf-syntax-ns#")); + + SerdCursor* all = serd_model_begin(model); + serd_describe_range(all, serd_writer_sink(writer), 0); + serd_cursor_free(all); + + serd_writer_finish(writer); + const char* str = serd_buffer_sink_finish(&buffer); + const char* expected = "\n" + " _:l1 .\n" + "\n" + "_:l1\n" + " rdf:first \"a\" ;\n" + " rdf:rest [\n" + " rdf:first \"b\" ;\n" + " rdf:rest _:l1\n" + " ] .\n"; + + assert(!strcmp(str, expected)); + + free(buffer.buf); + serd_writer_free(writer); + serd_byte_sink_free(out); + serd_model_free(model); + serd_env_free(env); + serd_nodes_free(nodes); + + return 0; +} + +typedef struct { + size_t n_written; + size_t max_successes; +} FailingWriteFuncState; + +/// Write function that fails after a certain number of writes +static size_t +failing_write_func(const void* buf, size_t size, size_t nmemb, void* stream) +{ + (void)buf; + (void)size; + (void)nmemb; + + FailingWriteFuncState* state = (FailingWriteFuncState*)stream; + + return (++state->n_written > state->max_successes) ? 0 : nmemb; +} + +static int +test_write_error_in_list_subject(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + serd_set_log_func(world, expected_error, NULL); + + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0u); + SerdNodes* nodes = serd_nodes_new(); + + serd_model_add_index(model, SERD_ORDER_OPS); + + /* const SerdNode* s = serd_nodes_uri(nodes, SERD_STRING("urn:s")); */ + const SerdNode* p = serd_nodes_uri(nodes, SERD_STRING("urn:p")); + const SerdNode* o = serd_nodes_uri(nodes, SERD_STRING("urn:o")); + const SerdNode* l1 = serd_nodes_blank(nodes, SERD_STRING("l1")); + + const SerdNode* one = serd_nodes_integer(nodes, 1, SERD_EMPTY_STRING()); + const SerdNode* l2 = serd_nodes_blank(nodes, SERD_STRING("l2")); + const SerdNode* two = serd_nodes_integer(nodes, 2, SERD_EMPTY_STRING()); + + const SerdNode* rdf_first = serd_nodes_uri(nodes, SERD_STRING(RDF_FIRST)); + const SerdNode* rdf_rest = serd_nodes_uri(nodes, SERD_STRING(RDF_REST)); + const SerdNode* rdf_nil = serd_nodes_uri(nodes, SERD_STRING(RDF_NIL)); + + serd_model_add(model, l1, rdf_first, one, NULL); + serd_model_add(model, l1, rdf_rest, l2, NULL); + serd_model_add(model, l2, rdf_first, two, NULL); + serd_model_add(model, l2, rdf_rest, rdf_nil, NULL); + serd_model_add(model, l1, p, o, NULL); + + SerdEnv* env = serd_env_new(SERD_EMPTY_STRING()); + + for (size_t max_successes = 0; max_successes < 18; ++max_successes) { + FailingWriteFuncState state = {0, max_successes}; + SerdByteSink* out = + serd_byte_sink_new_function(failing_write_func, &state, 1); + SerdWriter* writer = serd_writer_new(world, SERD_TURTLE, 0, env, out); + + const SerdSink* const sink = serd_writer_sink(writer); + SerdCursor* const all = serd_model_begin(model); + const SerdStatus st = serd_describe_range(all, sink, 0); + serd_cursor_free(all); + + assert(st == SERD_ERR_BAD_WRITE); + + serd_writer_free(writer); + serd_byte_sink_free(out); + } + + serd_env_free(env); + serd_model_free(model); + serd_nodes_free(nodes); + return 0; +} + +static int +test_write_error_in_list_object(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + serd_set_log_func(world, expected_error, NULL); + + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0u); + SerdNodes* nodes = serd_nodes_new(); + + serd_model_add_index(model, SERD_ORDER_OPS); + + const SerdNode* s = serd_nodes_uri(nodes, SERD_STRING("urn:s")); + const SerdNode* p = serd_nodes_uri(nodes, SERD_STRING("urn:p")); + const SerdNode* l1 = serd_nodes_blank(nodes, SERD_STRING("l1")); + + const SerdNode* one = serd_nodes_integer(nodes, 1, SERD_EMPTY_STRING()); + const SerdNode* l2 = serd_nodes_blank(nodes, SERD_STRING("l2")); + const SerdNode* two = serd_nodes_integer(nodes, 2, SERD_EMPTY_STRING()); + + const SerdNode* rdf_first = serd_nodes_uri(nodes, SERD_STRING(RDF_FIRST)); + const SerdNode* rdf_rest = serd_nodes_uri(nodes, SERD_STRING(RDF_REST)); + const SerdNode* rdf_nil = serd_nodes_uri(nodes, SERD_STRING(RDF_NIL)); + + serd_model_add(model, s, p, l1, NULL); + serd_model_add(model, l1, rdf_first, one, NULL); + serd_model_add(model, l1, rdf_rest, l2, NULL); + serd_model_add(model, l2, rdf_first, two, NULL); + serd_model_add(model, l2, rdf_rest, rdf_nil, NULL); + + SerdEnv* env = serd_env_new(SERD_EMPTY_STRING()); + + for (size_t max_successes = 0; max_successes < 21; ++max_successes) { + FailingWriteFuncState state = {0, max_successes}; + SerdByteSink* out = + serd_byte_sink_new_function(failing_write_func, &state, 1); + SerdWriter* writer = serd_writer_new(world, SERD_TURTLE, 0, env, out); + + const SerdSink* const sink = serd_writer_sink(writer); + SerdCursor* const all = serd_model_begin(model); + const SerdStatus st = serd_describe_range(all, sink, 0); + serd_cursor_free(all); + + assert(st == SERD_ERR_BAD_WRITE); + + serd_writer_free(writer); + serd_byte_sink_free(out); + } + + serd_env_free(env); + serd_model_free(model); + serd_nodes_free(nodes); + return 0; +} + +int +main(void) +{ + static const unsigned n_quads = 300; + + serd_model_free(NULL); // Shouldn't crash + + typedef int (*TestFunc)(SerdWorld*, unsigned); + + const TestFunc tests[] = {test_free_null, + test_get_world, + test_get_default_order, + test_get_flags, + test_all_begin, + test_begin_ordered, + test_add_with_iterator, + test_add_remove_nodes, + test_add_index, + test_remove_index, + test_inserter, + test_erase_with_iterator, + test_add_erase, + test_add_bad_statement, + test_add_with_caret, + test_erase_all, + test_clear, + test_copy, + test_equals, + test_find_past_end, + test_find_unknown_node, + test_find_graph, + test_range, + test_triple_index_read, + test_quad_index_read, + test_remove_graph, + test_default_graph, + test_write_flat_range, + test_write_bad_list, + test_write_infinite_list, + test_write_error_in_list_subject, + test_write_error_in_list_object, + NULL}; + + SerdWorld* world = serd_world_new(); + int ret = 0; + + for (const TestFunc* t = tests; *t; ++t) { + serd_set_log_func(world, NULL, NULL); + ret += (*t)(world, n_quads); + } + + serd_world_free(world); + return ret; +} diff --git a/test/test_string.c b/test/test_string.c index 1181de01..2bd47680 100644 --- a/test/test_string.c +++ b/test/test_string.c @@ -32,7 +32,7 @@ test_strerror(void) { const char* msg = serd_strerror(SERD_SUCCESS); assert(!strcmp(msg, "Success")); - for (int i = SERD_FAILURE; i <= SERD_ERR_BAD_URI; ++i) { + for (int i = SERD_FAILURE; i <= SERD_ERR_BAD_INDEX; ++i) { msg = serd_strerror((SerdStatus)i); assert(strcmp(msg, "Success")); } -- cgit v1.2.1