From b5956c4dc6b065d664908104d5fc6752a87e3364 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Fri, 31 Mar 2023 17:17:41 -0400 Subject: Add model and serd-sort utility With all the new functionality, the complexity of the serd-pipe command-line interface is starting to push the limits of available flags. So, instead of grafting on further options to control a model, this commit adds a new tool, serd-sort, which acts somewhat like a stripped-down serd-pipe that stores statements in a model in memory. This keeps the complexity (including the user-facing complexity) of any one tool down, since other more focused tools can be used for streaming tasks in a pipeline. In other words, abandon Swissarmyknifeism, take a page from the Unix philosophy, and try to expose the model functionality to the command-line in a dedicated focused tool. The model implementation is tested by using this tool to run a subset of the usual test suites, and a special suite to test statement sorting. --- include/serd/cursor.h | 83 +++++++++++++ include/serd/describe.h | 57 +++++++++ include/serd/inserter.h | 44 +++++++ include/serd/model.h | 325 ++++++++++++++++++++++++++++++++++++++++++++++++ include/serd/serd.h | 4 + include/serd/status.h | 2 + 6 files changed, 515 insertions(+) create mode 100644 include/serd/cursor.h create mode 100644 include/serd/describe.h create mode 100644 include/serd/inserter.h create mode 100644 include/serd/model.h (limited to 'include') diff --git a/include/serd/cursor.h b/include/serd/cursor.h new file mode 100644 index 00000000..eba88ca7 --- /dev/null +++ b/include/serd/cursor.h @@ -0,0 +1,83 @@ +// Copyright 2011-2022 David Robillard +// SPDX-License-Identifier: ISC + +#ifndef SERD_CURSOR_H +#define SERD_CURSOR_H + +#include "serd/attributes.h" +#include "serd/memory.h" +#include "serd/statement.h" +#include "serd/status.h" +#include "zix/attributes.h" + +#include + +SERD_BEGIN_DECLS + +/** + @defgroup serd_cursor Cursor + @ingroup serd_storage + @{ +*/ + +/** + A cursor that iterates over statements in a model. + + A cursor is a smart iterator that visits all statements that match a + pattern. +*/ +typedef struct SerdCursorImpl SerdCursor; + +/// Return a new copy of `cursor` +SERD_API SerdCursor* ZIX_ALLOCATED +serd_cursor_copy(SerdAllocator* ZIX_NULLABLE allocator, + const SerdCursor* ZIX_NULLABLE cursor); + +/// Return the statement pointed to by `cursor` +SERD_API const SerdStatement* ZIX_NULLABLE +serd_cursor_get(const SerdCursor* ZIX_NULLABLE cursor); + +/** + Increment cursor to point to the next statement. + + Null is treated like an end cursor. + + @return Failure if `cursor` was already at the end. +*/ +SERD_API SerdStatus +serd_cursor_advance(SerdCursor* ZIX_NULLABLE cursor); + +/** + Return true if the cursor has reached its end. + + Null is treated like an end cursor. +*/ +SERD_PURE_API bool +serd_cursor_is_end(const SerdCursor* ZIX_NULLABLE cursor); + +/** + Return true iff `lhs` equals `rhs`. + + Two cursors are equivalent if they point to the same statement in the same + index in the same model, or are both the end of the same model. Note that + two cursors can point to the same statement but not be equivalent, since + they may have reached the statement via different indices. + + Null is treated like an end cursor. +*/ +SERD_PURE_API bool +serd_cursor_equals(const SerdCursor* ZIX_NULLABLE lhs, + const SerdCursor* ZIX_NULLABLE rhs); + +/// Free `cursor` +SERD_API void +serd_cursor_free(SerdAllocator* ZIX_NULLABLE allocator, + SerdCursor* ZIX_NULLABLE cursor); + +/** + @} +*/ + +SERD_END_DECLS + +#endif // SERD_CURSOR_H diff --git a/include/serd/describe.h b/include/serd/describe.h new file mode 100644 index 00000000..c571aeae --- /dev/null +++ b/include/serd/describe.h @@ -0,0 +1,57 @@ +// Copyright 2011-2023 David Robillard +// SPDX-License-Identifier: ISC + +#ifndef SERD_DESCRIBE_H +#define SERD_DESCRIBE_H + +#include "serd/attributes.h" +#include "serd/cursor.h" +#include "serd/memory.h" +#include "serd/sink.h" +#include "serd/status.h" +#include "zix/attributes.h" + +#include + +SERD_BEGIN_DECLS + +/** + @defgroup serd_range Range + @ingroup serd_storage + @{ +*/ + +/// Flags that control the style of a model description +typedef enum { + SERD_NO_TYPE_FIRST = 1U << 0U, ///< Disable writing rdf:type ("a") first +} SerdDescribeFlag; + +/// Bitwise OR of SerdDescribeFlag values +typedef uint32_t SerdDescribeFlags; + +/** + Describe a range of statements by writing to a sink. + + This will consume the given cursor, and emit at least every statement it + visits. More statements from the model may be written in order to describe + anonymous blank nodes that are associated with a subject in the range. + + The default is to write statements in an order suited for pretty-printing + with Turtle or TriG with as many anonymous nodes as possible. If + `SERD_NO_INLINE_OBJECTS` is given, a simple sorted stream is written + instead, which is faster since no searching is required, but can result in + ugly output for Turtle or Trig. +*/ +SERD_API SerdStatus +serd_describe_range(SerdAllocator* ZIX_NULLABLE allocator, + const SerdCursor* ZIX_NULLABLE range, + const SerdSink* ZIX_NONNULL sink, + SerdDescribeFlags flags); + +/** + @} +*/ + +SERD_END_DECLS + +#endif // SERD_DESCRIBE_H diff --git a/include/serd/inserter.h b/include/serd/inserter.h new file mode 100644 index 00000000..482bff5b --- /dev/null +++ b/include/serd/inserter.h @@ -0,0 +1,44 @@ +// Copyright 2011-2022 David Robillard +// SPDX-License-Identifier: ISC + +#ifndef SERD_INSERTER_H +#define SERD_INSERTER_H + +#include "serd/attributes.h" +#include "serd/model.h" +#include "serd/node.h" +#include "serd/sink.h" +#include "zix/attributes.h" + +SERD_BEGIN_DECLS + +/** + @defgroup serd_inserter Inserter + @ingroup serd_storage + @{ +*/ + +/** + Create an inserter for writing statements to a model. + + Once created, an inserter is just a sink with no additional interface. + + @param model The model to insert received statements into. + + @param default_graph Optional default graph, which will be set on received + statements that have no graph. This allows, for example, loading a Turtle + document into an isolated graph in the model. + + @return A newly allocated sink which must be freed with serd_sink_free(). +*/ +SERD_API SerdSink* ZIX_ALLOCATED +serd_inserter_new(SerdModel* ZIX_NONNULL model, + const SerdNode* ZIX_NULLABLE default_graph); + +/** + @} +*/ + +SERD_END_DECLS + +#endif // SERD_INSERTER_H diff --git a/include/serd/model.h b/include/serd/model.h new file mode 100644 index 00000000..24055c9d --- /dev/null +++ b/include/serd/model.h @@ -0,0 +1,325 @@ +// Copyright 2011-2022 David Robillard +// SPDX-License-Identifier: ISC + +#ifndef SERD_MODEL_H +#define SERD_MODEL_H + +#include "serd/attributes.h" +#include "serd/caret.h" +#include "serd/cursor.h" +#include "serd/memory.h" +#include "serd/node.h" +#include "serd/nodes.h" +#include "serd/statement.h" +#include "serd/status.h" +#include "serd/world.h" +#include "zix/attributes.h" + +#include +#include +#include + +SERD_BEGIN_DECLS + +/** + @defgroup serd_model Model + @ingroup serd_storage + @{ +*/ + +/// An indexed set of statements +typedef struct SerdModelImpl SerdModel; + +/** + Statement ordering. + + Statements themselves always have the same fields in the same order + (subject, predicate, object, graph), but a model can keep indices for + different orderings to provide good performance for different kinds of + queries. +*/ +typedef enum { + SERD_ORDER_SPO, ///< Subject, Predicate, Object + SERD_ORDER_SOP, ///< Subject, Object, Predicate + SERD_ORDER_OPS, ///< Object, Predicate, Subject + SERD_ORDER_OSP, ///< Object, Subject, Predicate + SERD_ORDER_PSO, ///< Predicate, Subject, Object + SERD_ORDER_POS, ///< Predicate, Object, Subject + SERD_ORDER_GSPO, ///< Graph, Subject, Predicate, Object + SERD_ORDER_GSOP, ///< Graph, Subject, Object, Predicate + SERD_ORDER_GOPS, ///< Graph, Object, Predicate, Subject + SERD_ORDER_GOSP, ///< Graph, Object, Subject, Predicate + SERD_ORDER_GPSO, ///< Graph, Predicate, Subject, Object + SERD_ORDER_GPOS, ///< Graph, Predicate, Object, Subject +} SerdStatementOrder; + +/// Flags that control model storage and indexing +typedef enum { + SERD_STORE_GRAPHS = 1U << 0U, ///< Store and index the graph of statements + SERD_STORE_CARETS = 1U << 1U, ///< Store original caret of statements +} SerdModelFlag; + +/// Bitwise OR of SerdModelFlag values +typedef uint32_t SerdModelFlags; + +/** + Create a new model. + + @param world The world in which to make this model. + + @param default_order The order for the default index, which is always + present and responsible for owning all the statements in the model. This + should almost always be #SERD_ORDER_SPO or #SERD_ORDER_GSPO (which + support writing pretty documents), but advanced applications that do not want + either of these indices can use a different order. Additional indices can + be added with serd_model_add_index(). + + @param flags Options that control what data is stored in the model. +*/ +SERD_API SerdModel* ZIX_ALLOCATED +serd_model_new(SerdWorld* ZIX_NONNULL world, + SerdStatementOrder default_order, + SerdModelFlags flags); + +/// Return a deep copy of `model` +SERD_API SerdModel* ZIX_ALLOCATED +serd_model_copy(SerdAllocator* ZIX_NULLABLE allocator, + const SerdModel* ZIX_NONNULL model); + +/// Return true iff `a` is equal to `b`, ignoring statement cursor metadata +SERD_API bool +serd_model_equals(const SerdModel* ZIX_NULLABLE a, + const SerdModel* ZIX_NULLABLE b); + +/// Close and free `model` +SERD_API void +serd_model_free(SerdModel* ZIX_NULLABLE model); + +/** + Add an index for a particular statement order to the model. + + @return Failure if this index already exists. +*/ +SERD_API SerdStatus +serd_model_add_index(SerdModel* ZIX_NONNULL model, SerdStatementOrder order); + +/** + Add an index for a particular statement order to the model. + + @return Failure if this index does not exist. +*/ +SERD_API SerdStatus +serd_model_drop_index(SerdModel* ZIX_NONNULL model, SerdStatementOrder order); + +/// Get the world associated with `model` +SERD_PURE_API SerdWorld* ZIX_NONNULL +serd_model_world(SerdModel* ZIX_NONNULL model); + +/// Get all nodes interned in `model` +SERD_PURE_API const SerdNodes* ZIX_NONNULL +serd_model_nodes(const SerdModel* ZIX_NONNULL model); + +/// Get the default statement order of `model` +SERD_PURE_API SerdStatementOrder +serd_model_default_order(const SerdModel* ZIX_NONNULL model); + +/// Get the flags enabled on `model` +SERD_PURE_API SerdModelFlags +serd_model_flags(const SerdModel* ZIX_NONNULL model); + +/// Return the number of statements stored in `model` +SERD_PURE_API size_t +serd_model_size(const SerdModel* ZIX_NONNULL model); + +/// Return true iff there are no statements stored in `model` +SERD_PURE_API bool +serd_model_empty(const SerdModel* ZIX_NONNULL model); + +/** + Return a cursor at the start of every statement in the model. + + The returned cursor will advance over every statement in the model's default + order. + + @param allocator The allocator used for the returned cursor. + @param model The model that the returned cursor points to. +*/ +SERD_API SerdCursor* ZIX_ALLOCATED +serd_model_begin(SerdAllocator* ZIX_NULLABLE allocator, + const SerdModel* ZIX_NONNULL model); + +/** + Return a cursor past the end of the model. + + This returns the "universal" end cursor, which is equivalent to any cursor + for this model that has reached its end. +*/ +SERD_CONST_API const SerdCursor* ZIX_NONNULL +serd_model_end(const SerdModel* ZIX_NONNULL model); + +/** + Return a cursor over all statements in the model in a specific order. + + @param allocator The allocator used for the returned cursor. + @param model The model that the returned cursor points to. + @param order The statement order that the returned cursor advances through. +*/ +SERD_API SerdCursor* ZIX_ALLOCATED +serd_model_begin_ordered(SerdAllocator* ZIX_NULLABLE allocator, + const SerdModel* ZIX_NONNULL model, + SerdStatementOrder order); + +/** + Search for statements that match a pattern. + + @param allocator The allocator used for the returned cursor. + @param model The model to search in. + @param s The subject to match, or null. + @param p The predicate to match, or null. + @param o The object to match, or null. + @param g The graph to match, or null. + @return A cursor pointing at the first match, or the end. +*/ +SERD_API SerdCursor* ZIX_NULLABLE +serd_model_find(SerdAllocator* ZIX_NULLABLE allocator, + const SerdModel* ZIX_NONNULL model, + const SerdNode* ZIX_NULLABLE s, + const SerdNode* ZIX_NULLABLE p, + const SerdNode* ZIX_NULLABLE o, + const SerdNode* ZIX_NULLABLE g); + +/** + Search for a single node that matches a pattern. + + Exactly one of `s`, `p`, `o` must be NULL. + This function is mainly useful for predicates that only have one value. + + @return The first matching node, or NULL if no matches are found. +*/ +SERD_API const SerdNode* ZIX_NULLABLE +serd_model_get(const SerdModel* ZIX_NONNULL model, + const SerdNode* ZIX_NULLABLE s, + const SerdNode* ZIX_NULLABLE p, + const SerdNode* ZIX_NULLABLE o, + const SerdNode* ZIX_NULLABLE g); + +/** + Search for a single statement that matches a pattern. + + This function is mainly useful for predicates that only have one value. + + @return The first matching statement, or NULL if none are found. +*/ +SERD_API const SerdStatement* ZIX_NULLABLE +serd_model_get_statement(const SerdModel* ZIX_NONNULL model, + const SerdNode* ZIX_NULLABLE s, + const SerdNode* ZIX_NULLABLE p, + const SerdNode* ZIX_NULLABLE o, + const SerdNode* ZIX_NULLABLE g); + +/// Return true iff a statement exists +SERD_API bool +serd_model_ask(const SerdModel* ZIX_NONNULL model, + const SerdNode* ZIX_NULLABLE s, + const SerdNode* ZIX_NULLABLE p, + const SerdNode* ZIX_NULLABLE o, + const SerdNode* ZIX_NULLABLE g); + +/// Return the number of matching statements +SERD_API size_t +serd_model_count(const SerdModel* ZIX_NONNULL model, + const SerdNode* ZIX_NULLABLE s, + const SerdNode* ZIX_NULLABLE p, + const SerdNode* ZIX_NULLABLE o, + const SerdNode* ZIX_NULLABLE g); + +/** + Add a statement to a model from nodes. + + This function fails if there are any active iterators on `model`. +*/ +SERD_API SerdStatus +serd_model_add(SerdModel* ZIX_NONNULL model, + const SerdNode* ZIX_NONNULL s, + const SerdNode* ZIX_NONNULL p, + const SerdNode* ZIX_NONNULL o, + const SerdNode* ZIX_NULLABLE g); + +/** + Add a statement to a model from nodes with a document origin. + + This function fails if there are any active iterators on `model`. +*/ +SERD_API SerdStatus +serd_model_add_with_caret(SerdModel* ZIX_NONNULL model, + const SerdNode* ZIX_NONNULL s, + const SerdNode* ZIX_NONNULL p, + const SerdNode* ZIX_NONNULL o, + const SerdNode* ZIX_NULLABLE g, + const SerdCaret* ZIX_NULLABLE caret); + +/** + Add a statement to a model. + + This function fails if there are any active iterators on `model`. + If statement is null, then SERD_FAILURE is returned. +*/ +SERD_API SerdStatus +serd_model_insert(SerdModel* ZIX_NONNULL model, + const SerdStatement* ZIX_NONNULL statement); + +/** + Add a range of statements to a model. + + This function fails if there are any active iterators on `model`. +*/ +SERD_API SerdStatus +serd_model_insert_statements(SerdModel* ZIX_NONNULL model, + SerdCursor* ZIX_NONNULL range); + +/** + Remove a statement from a model via an iterator. + + Calling this function invalidates all other iterators on this model. + + @param model The model which `iter` points to. + + @param cursor Cursor pointing to the element to erase. This cursor is + advanced to the next statement on return. +*/ +SERD_API SerdStatus +serd_model_erase(SerdModel* ZIX_NONNULL model, SerdCursor* ZIX_NONNULL cursor); + +/** + Remove a range of statements from a model. + + This can be used with serd_model_find() to erase all statements in a model + that match a pattern. + + Calling this function invalidates all iterators on `model`. + + @param model The model which `range` points to. + + @param range Range to erase, which will be empty on return. +*/ +SERD_API SerdStatus +serd_model_erase_statements(SerdModel* ZIX_NONNULL model, + SerdCursor* ZIX_NONNULL range); + +/** + Remove everything from a model. + + Calling this function invalidates all iterators on `model`. + + @param model The model to clear. +*/ +SERD_API SerdStatus +serd_model_clear(SerdModel* ZIX_NONNULL model); + +/** + @} +*/ + +SERD_END_DECLS + +#endif // SERD_MODEL_H diff --git a/include/serd/serd.h b/include/serd/serd.h index 77d1abf8..5d208d97 100644 --- a/include/serd/serd.h +++ b/include/serd/serd.h @@ -94,6 +94,10 @@ @{ */ +#include "serd/cursor.h" +#include "serd/describe.h" +#include "serd/inserter.h" +#include "serd/model.h" #include "serd/nodes.h" /** diff --git a/include/serd/status.h b/include/serd/status.h index 5aedd5a6..90c8d83c 100644 --- a/include/serd/status.h +++ b/include/serd/status.h @@ -40,6 +40,8 @@ typedef enum { SERD_BAD_DATA, ///< Invalid data SERD_BAD_LITERAL, ///< Invalid literal SERD_BAD_PATTERN, ///< Invalid statement pattern + SERD_BAD_CURSOR, ///< Use of invalidated cursor + SERD_BAD_INDEX, ///< No suitable model index available } SerdStatus; /// Return a string describing a status code -- cgit v1.2.1