diff options
53 files changed, 4886 insertions, 43 deletions
diff --git a/.reuse/dep5 b/.reuse/dep5 index 26922e03..df791849 100644 --- a/.reuse/dep5 +++ b/.reuse/dep5 @@ -8,7 +8,7 @@ Copyright: 2010 World Wide Web Consortium, (MIT, ERCIM, Keio, Beihang) and other Comment: Standard test suites from the W3C License: BSD-3-Clause -Files: test/extra/* test/multifile/* +Files: test/extra/* test/multifile/* test/sort/* Copyright: 2011-2023 David Robillard <d@drobilla.net> Comment: Extra test suites for serd License: BSD-3-Clause OR ISC @@ -4,6 +4,7 @@ serd (1.1.1) unstable; urgency=medium * Add SerdWorld for shared library state * Add extensible logging API * Add filtering of statements by a triple or quad pattern + * Add model for storing statements in memory * Add support for converting literals to canonical form * Add support for parsing variables * Add support for writing terse output with minimal newlines diff --git a/doc/conf.py.in b/doc/conf.py.in index 70efec61..2ae43555 100644 --- a/doc/conf.py.in +++ b/doc/conf.py.in @@ -29,7 +29,9 @@ _opaque = [ "FILE", "SerdAllocatorImpl", "SerdCaretImpl", + "SerdCursorImpl", "SerdEnvImpl", + "SerdModelImpl", "SerdNodeImpl", "SerdNodesImpl", "SerdReaderImpl", diff --git a/doc/man/meson.build b/doc/man/meson.build index 575d3d71..b9ce66a7 100644 --- a/doc/man/meson.build +++ b/doc/man/meson.build @@ -23,6 +23,7 @@ if not get_option('tools').disabled() install_man(files('serd-filter.1')) install_man(files('serd-pipe.1')) + install_man(files('serd-sort.1')) endif # Build/install HTML man pages if mandoc is present @@ -47,6 +48,7 @@ if not get_option('tools').disabled() page_names = [ 'serd-filter', 'serd-pipe', + 'serd-sort', ] html_mandir = docdir / versioned_name / 'man' diff --git a/doc/man/serd-filter.1 b/doc/man/serd-filter.1 index 634d5f3b..ff076b97 100644 --- a/doc/man/serd-filter.1 +++ b/doc/man/serd-filter.1 @@ -132,6 +132,8 @@ To print every statement about http://example.org/subject: .It .Xr serd-pipe 1 .It +.Xr serd-sort 1 +.It .Lk http://drobilla.net/software/serd/ .El .Sh STANDARDS diff --git a/doc/man/serd-pipe.1 b/doc/man/serd-pipe.1 index 2d6534ae..a2ce86eb 100644 --- a/doc/man/serd-pipe.1 +++ b/doc/man/serd-pipe.1 @@ -8,13 +8,14 @@ .Nd read and write RDF data .Sh SYNOPSIS .Nm serd-pipe -.Op Fl CVhq +.Op Fl CVh .Op Fl B Ar base .Op Fl I Ar syntax .Op Fl O Ar syntax .Op Fl R Ar root .Op Fl b Ar bytes .Op Fl k Ar bytes +.Op Fl l Ar level .Op Fl o Ar filename .Op Fl s Ar string .Op Ar input ... @@ -224,12 +225,31 @@ Parsing is performed using a pre-allocated stack for performance and security re By default, the stack is 1 MiB, which should be sufficient for most data. This can be increased to support unusually structured data and huge literals, or decreased to reduce overall memory requirements and reduce startup time. +.It Fl l Ar level +Maximum log level, or (equivalently) minimum log priority. +Only messages with at least the priority of this level will be displayed. +The +.Ar level +is as in +.Xr syslog 2 , +either a number from +.Cm 0 +to +.Cm 7, +or +.Cm emerg , +.Cm alert , +.Cm crit , +.Cm err , +.Cm warn , +.Cm note , +.Cm info , +or +.Cm debug . .It Fl o Ar filename Write output to the given .Ar filename instead of stdout. -.It Fl q -Suppress all output except data. .It Fl s Ar string Parse .Ar string @@ -281,6 +301,8 @@ exits with a status of 0, or non-zero if an error occurred. .It .Xr serd-filter 1 .It +.Xr serd-sort 1 +.It .Lk http://drobilla.net/software/serd/ .It .Lk http://gitlab.com/drobilla/serd/ diff --git a/doc/man/serd-sort.1 b/doc/man/serd-sort.1 new file mode 100644 index 00000000..1484d67e --- /dev/null +++ b/doc/man/serd-sort.1 @@ -0,0 +1,186 @@ +.\" # Copyright 2021-2022 David Robillard <d@drobilla.net> +.\" # SPDX-License-Identifier: ISC +.Dd July 15, 2022 +.Dt SERD-SORT 1 +.Os Serd +.Sh NAME +.Nm serd-sort +.Nd reorder RDF statements +.Sh SYNOPSIS +.Nm serd-sort +.Op Fl htV +.Op Fl B Ar base +.Op Fl I Ar syntax +.Op Fl O Ar syntax +.Op Fl b Ar bytes +.Op Fl c Ar collation +.Op Fl k Ar bytes +.Op Fl o Ar filename +.Op Ar input ... +.Sh DESCRIPTION +.Nm +reorders statements in RDF data by loading everything into memory then rewriting it. +By default, +a +.Dq pretty +ordering is used which is ideal for pretty-printing Turtle or TriG. +The +.Fl c +option can be used to request a specific ordering, +which is mainly useful when emitting a line-based syntax like NTriples or NQuads in a pipeline. +.Pp +Input and output arguments work the same way as with +.Xr serd-pipe 1 . +.Pp +The options are as follows: +.Pp +.Bl -tag -compact -width 3n +.It Fl B Ar base +Base URI, path, or +.Cm rebase +to use the output path. +See +.Xr serd-pipe 1 +for details. +.Pp +.It Fl I Ar syntax +Input syntax or option: +.Cm NQuads , +.Cm NTriples , +.Cm TriG , +.Cm Turtle , +.Cm lax , +.Cm variables , +.Cm relative , +or +.Cm labels . +See +.Xr serd-pipe 1 +for details. +.Pp +.It Fl O Ar syntax +Output syntax or option: +.Cm empty , +.Cm NQuads , +.Cm NTriples , +.Cm TriG , +.Cm Turtle , +.Cm ascii , +.Cm expanded , +.Cm verbatim , +.Cm terse , +or +.Cm lax . +See +.Xr serd-pipe 1 +for details. +.Pp +.It Fl V +Display version information and exit. +.Pp +.It Fl b Ar bytes +I/O block size. +See +.Xr serd-pipe 1 +for details. +.Pp +.It Fl c Ar collation +A specific collation (statement ordering) to use. +This can be any ordering of the characters +.Dq SPO , +which stand for the subject, predicate, and object of statements. +Optionally, +.Dq G +can be added as the first character, +which will sort graph-first. +Concretely, the valid values are: +.Cm SPO , +.Cm SOP , +.Cm OPS , +.Cm OSP , +.Cm PSO , +.Cm POS , +.Cm GSPO , +.Cm GSOP , +.Cm GOPS , +.Cm GOSP , +.Cm GPSO , +and +.Cm GPOS . +.Pp +.It Fl h +Print the command line options. +.Pp +.It Fl k Ar bytes +Parser stack size. +See +.Xr serd-pipe 1 +for details. +.Pp +.It Fl o Ar filename +Write output to the given +.Ar filename +instead of stdout. +.Pp +.It Fl t +Do not write type as +.Dq a +before other properties. +Instead, rdf:type will be written in order like any other property. +.El +.Sh EXIT STATUS +.Nm +exits with a status of 0, or non-zero if an error occured. +.Sh EXAMPLES +To pretty-print a file: +.Pp +.Dl $ serd-sort -o pretty.ttl input.ttl +.Pp +To print statements ordered by predicate, subject, then object: +.Pp +.Dl $ serd-sort -c PSO input.ttl +.Sh SEE ALSO +.Bl -item -compact +.It +.Xr serd-pipe 1 +.It +.Xr serd-filter 1 +.It +.Lk http://drobilla.net/software/serd/ +.El +.Sh STANDARDS +.Bl -item -compact +.It +.Rs +.%A W3C +.%T RDF 1.1 NQuads +.%D February 2014 +.Re +.Lk https://www.w3.org/TR/n-quads/ +.It +.Rs +.%A W3C +.%D February 2014 +.%T RDF 1.1 NTriples +.Re +.Lk https://www.w3.org/TR/n-triples/ +.It +.Rs +.%A W3C +.%T RDF 1.1 TriG +.%D February 2014 +.Re +.Lk https://www.w3.org/TR/trig/ +.It +.Rs +.%A W3C +.%D February 2014 +.%T RDF 1.1 Turtle +.Re +.Lk https://www.w3.org/TR/turtle/ +.El +.Sh AUTHORS +.Nm +is a part of serd, by +.An David Robillard +.Mt d@drobilla.net . diff --git a/include/serd/cursor.h b/include/serd/cursor.h new file mode 100644 index 00000000..eba88ca7 --- /dev/null +++ b/include/serd/cursor.h @@ -0,0 +1,83 @@ +// Copyright 2011-2022 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#ifndef SERD_CURSOR_H +#define SERD_CURSOR_H + +#include "serd/attributes.h" +#include "serd/memory.h" +#include "serd/statement.h" +#include "serd/status.h" +#include "zix/attributes.h" + +#include <stdbool.h> + +SERD_BEGIN_DECLS + +/** + @defgroup serd_cursor Cursor + @ingroup serd_storage + @{ +*/ + +/** + A cursor that iterates over statements in a model. + + A cursor is a smart iterator that visits all statements that match a + pattern. +*/ +typedef struct SerdCursorImpl SerdCursor; + +/// Return a new copy of `cursor` +SERD_API SerdCursor* ZIX_ALLOCATED +serd_cursor_copy(SerdAllocator* ZIX_NULLABLE allocator, + const SerdCursor* ZIX_NULLABLE cursor); + +/// Return the statement pointed to by `cursor` +SERD_API const SerdStatement* ZIX_NULLABLE +serd_cursor_get(const SerdCursor* ZIX_NULLABLE cursor); + +/** + Increment cursor to point to the next statement. + + Null is treated like an end cursor. + + @return Failure if `cursor` was already at the end. +*/ +SERD_API SerdStatus +serd_cursor_advance(SerdCursor* ZIX_NULLABLE cursor); + +/** + Return true if the cursor has reached its end. + + Null is treated like an end cursor. +*/ +SERD_PURE_API bool +serd_cursor_is_end(const SerdCursor* ZIX_NULLABLE cursor); + +/** + Return true iff `lhs` equals `rhs`. + + Two cursors are equivalent if they point to the same statement in the same + index in the same model, or are both the end of the same model. Note that + two cursors can point to the same statement but not be equivalent, since + they may have reached the statement via different indices. + + Null is treated like an end cursor. +*/ +SERD_PURE_API bool +serd_cursor_equals(const SerdCursor* ZIX_NULLABLE lhs, + const SerdCursor* ZIX_NULLABLE rhs); + +/// Free `cursor` +SERD_API void +serd_cursor_free(SerdAllocator* ZIX_NULLABLE allocator, + SerdCursor* ZIX_NULLABLE cursor); + +/** + @} +*/ + +SERD_END_DECLS + +#endif // SERD_CURSOR_H diff --git a/include/serd/describe.h b/include/serd/describe.h new file mode 100644 index 00000000..c571aeae --- /dev/null +++ b/include/serd/describe.h @@ -0,0 +1,57 @@ +// Copyright 2011-2023 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#ifndef SERD_DESCRIBE_H +#define SERD_DESCRIBE_H + +#include "serd/attributes.h" +#include "serd/cursor.h" +#include "serd/memory.h" +#include "serd/sink.h" +#include "serd/status.h" +#include "zix/attributes.h" + +#include <stdint.h> + +SERD_BEGIN_DECLS + +/** + @defgroup serd_range Range + @ingroup serd_storage + @{ +*/ + +/// Flags that control the style of a model description +typedef enum { + SERD_NO_TYPE_FIRST = 1U << 0U, ///< Disable writing rdf:type ("a") first +} SerdDescribeFlag; + +/// Bitwise OR of SerdDescribeFlag values +typedef uint32_t SerdDescribeFlags; + +/** + Describe a range of statements by writing to a sink. + + This will consume the given cursor, and emit at least every statement it + visits. More statements from the model may be written in order to describe + anonymous blank nodes that are associated with a subject in the range. + + The default is to write statements in an order suited for pretty-printing + with Turtle or TriG with as many anonymous nodes as possible. If + `SERD_NO_INLINE_OBJECTS` is given, a simple sorted stream is written + instead, which is faster since no searching is required, but can result in + ugly output for Turtle or Trig. +*/ +SERD_API SerdStatus +serd_describe_range(SerdAllocator* ZIX_NULLABLE allocator, + const SerdCursor* ZIX_NULLABLE range, + const SerdSink* ZIX_NONNULL sink, + SerdDescribeFlags flags); + +/** + @} +*/ + +SERD_END_DECLS + +#endif // SERD_DESCRIBE_H diff --git a/include/serd/inserter.h b/include/serd/inserter.h new file mode 100644 index 00000000..482bff5b --- /dev/null +++ b/include/serd/inserter.h @@ -0,0 +1,44 @@ +// Copyright 2011-2022 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#ifndef SERD_INSERTER_H +#define SERD_INSERTER_H + +#include "serd/attributes.h" +#include "serd/model.h" +#include "serd/node.h" +#include "serd/sink.h" +#include "zix/attributes.h" + +SERD_BEGIN_DECLS + +/** + @defgroup serd_inserter Inserter + @ingroup serd_storage + @{ +*/ + +/** + Create an inserter for writing statements to a model. + + Once created, an inserter is just a sink with no additional interface. + + @param model The model to insert received statements into. + + @param default_graph Optional default graph, which will be set on received + statements that have no graph. This allows, for example, loading a Turtle + document into an isolated graph in the model. + + @return A newly allocated sink which must be freed with serd_sink_free(). +*/ +SERD_API SerdSink* ZIX_ALLOCATED +serd_inserter_new(SerdModel* ZIX_NONNULL model, + const SerdNode* ZIX_NULLABLE default_graph); + +/** + @} +*/ + +SERD_END_DECLS + +#endif // SERD_INSERTER_H diff --git a/include/serd/model.h b/include/serd/model.h new file mode 100644 index 00000000..24055c9d --- /dev/null +++ b/include/serd/model.h @@ -0,0 +1,325 @@ +// Copyright 2011-2022 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#ifndef SERD_MODEL_H +#define SERD_MODEL_H + +#include "serd/attributes.h" +#include "serd/caret.h" +#include "serd/cursor.h" +#include "serd/memory.h" +#include "serd/node.h" +#include "serd/nodes.h" +#include "serd/statement.h" +#include "serd/status.h" +#include "serd/world.h" +#include "zix/attributes.h" + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> + +SERD_BEGIN_DECLS + +/** + @defgroup serd_model Model + @ingroup serd_storage + @{ +*/ + +/// An indexed set of statements +typedef struct SerdModelImpl SerdModel; + +/** + Statement ordering. + + Statements themselves always have the same fields in the same order + (subject, predicate, object, graph), but a model can keep indices for + different orderings to provide good performance for different kinds of + queries. +*/ +typedef enum { + SERD_ORDER_SPO, ///< Subject, Predicate, Object + SERD_ORDER_SOP, ///< Subject, Object, Predicate + SERD_ORDER_OPS, ///< Object, Predicate, Subject + SERD_ORDER_OSP, ///< Object, Subject, Predicate + SERD_ORDER_PSO, ///< Predicate, Subject, Object + SERD_ORDER_POS, ///< Predicate, Object, Subject + SERD_ORDER_GSPO, ///< Graph, Subject, Predicate, Object + SERD_ORDER_GSOP, ///< Graph, Subject, Object, Predicate + SERD_ORDER_GOPS, ///< Graph, Object, Predicate, Subject + SERD_ORDER_GOSP, ///< Graph, Object, Subject, Predicate + SERD_ORDER_GPSO, ///< Graph, Predicate, Subject, Object + SERD_ORDER_GPOS, ///< Graph, Predicate, Object, Subject +} SerdStatementOrder; + +/// Flags that control model storage and indexing +typedef enum { + SERD_STORE_GRAPHS = 1U << 0U, ///< Store and index the graph of statements + SERD_STORE_CARETS = 1U << 1U, ///< Store original caret of statements +} SerdModelFlag; + +/// Bitwise OR of SerdModelFlag values +typedef uint32_t SerdModelFlags; + +/** + Create a new model. + + @param world The world in which to make this model. + + @param default_order The order for the default index, which is always + present and responsible for owning all the statements in the model. This + should almost always be #SERD_ORDER_SPO or #SERD_ORDER_GSPO (which + support writing pretty documents), but advanced applications that do not want + either of these indices can use a different order. Additional indices can + be added with serd_model_add_index(). + + @param flags Options that control what data is stored in the model. +*/ +SERD_API SerdModel* ZIX_ALLOCATED +serd_model_new(SerdWorld* ZIX_NONNULL world, + SerdStatementOrder default_order, + SerdModelFlags flags); + +/// Return a deep copy of `model` +SERD_API SerdModel* ZIX_ALLOCATED +serd_model_copy(SerdAllocator* ZIX_NULLABLE allocator, + const SerdModel* ZIX_NONNULL model); + +/// Return true iff `a` is equal to `b`, ignoring statement cursor metadata +SERD_API bool +serd_model_equals(const SerdModel* ZIX_NULLABLE a, + const SerdModel* ZIX_NULLABLE b); + +/// Close and free `model` +SERD_API void +serd_model_free(SerdModel* ZIX_NULLABLE model); + +/** + Add an index for a particular statement order to the model. + + @return Failure if this index already exists. +*/ +SERD_API SerdStatus +serd_model_add_index(SerdModel* ZIX_NONNULL model, SerdStatementOrder order); + +/** + Add an index for a particular statement order to the model. + + @return Failure if this index does not exist. +*/ +SERD_API SerdStatus +serd_model_drop_index(SerdModel* ZIX_NONNULL model, SerdStatementOrder order); + +/// Get the world associated with `model` +SERD_PURE_API SerdWorld* ZIX_NONNULL +serd_model_world(SerdModel* ZIX_NONNULL model); + +/// Get all nodes interned in `model` +SERD_PURE_API const SerdNodes* ZIX_NONNULL +serd_model_nodes(const SerdModel* ZIX_NONNULL model); + +/// Get the default statement order of `model` +SERD_PURE_API SerdStatementOrder +serd_model_default_order(const SerdModel* ZIX_NONNULL model); + +/// Get the flags enabled on `model` +SERD_PURE_API SerdModelFlags +serd_model_flags(const SerdModel* ZIX_NONNULL model); + +/// Return the number of statements stored in `model` +SERD_PURE_API size_t +serd_model_size(const SerdModel* ZIX_NONNULL model); + +/// Return true iff there are no statements stored in `model` +SERD_PURE_API bool +serd_model_empty(const SerdModel* ZIX_NONNULL model); + +/** + Return a cursor at the start of every statement in the model. + + The returned cursor will advance over every statement in the model's default + order. + + @param allocator The allocator used for the returned cursor. + @param model The model that the returned cursor points to. +*/ +SERD_API SerdCursor* ZIX_ALLOCATED +serd_model_begin(SerdAllocator* ZIX_NULLABLE allocator, + const SerdModel* ZIX_NONNULL model); + +/** + Return a cursor past the end of the model. + + This returns the "universal" end cursor, which is equivalent to any cursor + for this model that has reached its end. +*/ +SERD_CONST_API const SerdCursor* ZIX_NONNULL +serd_model_end(const SerdModel* ZIX_NONNULL model); + +/** + Return a cursor over all statements in the model in a specific order. + + @param allocator The allocator used for the returned cursor. + @param model The model that the returned cursor points to. + @param order The statement order that the returned cursor advances through. +*/ +SERD_API SerdCursor* ZIX_ALLOCATED +serd_model_begin_ordered(SerdAllocator* ZIX_NULLABLE allocator, + const SerdModel* ZIX_NONNULL model, + SerdStatementOrder order); + +/** + Search for statements that match a pattern. + + @param allocator The allocator used for the returned cursor. + @param model The model to search in. + @param s The subject to match, or null. + @param p The predicate to match, or null. + @param o The object to match, or null. + @param g The graph to match, or null. + @return A cursor pointing at the first match, or the end. +*/ +SERD_API SerdCursor* ZIX_NULLABLE +serd_model_find(SerdAllocator* ZIX_NULLABLE allocator, + const SerdModel* ZIX_NONNULL model, + const SerdNode* ZIX_NULLABLE s, + const SerdNode* ZIX_NULLABLE p, + const SerdNode* ZIX_NULLABLE o, + const SerdNode* ZIX_NULLABLE g); + +/** + Search for a single node that matches a pattern. + + Exactly one of `s`, `p`, `o` must be NULL. + This function is mainly useful for predicates that only have one value. + + @return The first matching node, or NULL if no matches are found. +*/ +SERD_API const SerdNode* ZIX_NULLABLE +serd_model_get(const SerdModel* ZIX_NONNULL model, + const SerdNode* ZIX_NULLABLE s, + const SerdNode* ZIX_NULLABLE p, + const SerdNode* ZIX_NULLABLE o, + const SerdNode* ZIX_NULLABLE g); + +/** + Search for a single statement that matches a pattern. + + This function is mainly useful for predicates that only have one value. + + @return The first matching statement, or NULL if none are found. +*/ +SERD_API const SerdStatement* ZIX_NULLABLE +serd_model_get_statement(const SerdModel* ZIX_NONNULL model, + const SerdNode* ZIX_NULLABLE s, + const SerdNode* ZIX_NULLABLE p, + const SerdNode* ZIX_NULLABLE o, + const SerdNode* ZIX_NULLABLE g); + +/// Return true iff a statement exists +SERD_API bool +serd_model_ask(const SerdModel* ZIX_NONNULL model, + const SerdNode* ZIX_NULLABLE s, + const SerdNode* ZIX_NULLABLE p, + const SerdNode* ZIX_NULLABLE o, + const SerdNode* ZIX_NULLABLE g); + +/// Return the number of matching statements +SERD_API size_t +serd_model_count(const SerdModel* ZIX_NONNULL model, + const SerdNode* ZIX_NULLABLE s, + const SerdNode* ZIX_NULLABLE p, + const SerdNode* ZIX_NULLABLE o, + const SerdNode* ZIX_NULLABLE g); + +/** + Add a statement to a model from nodes. + + This function fails if there are any active iterators on `model`. +*/ +SERD_API SerdStatus +serd_model_add(SerdModel* ZIX_NONNULL model, + const SerdNode* ZIX_NONNULL s, + const SerdNode* ZIX_NONNULL p, + const SerdNode* ZIX_NONNULL o, + const SerdNode* ZIX_NULLABLE g); + +/** + Add a statement to a model from nodes with a document origin. + + This function fails if there are any active iterators on `model`. +*/ +SERD_API SerdStatus +serd_model_add_with_caret(SerdModel* ZIX_NONNULL model, + const SerdNode* ZIX_NONNULL s, + const SerdNode* ZIX_NONNULL p, + const SerdNode* ZIX_NONNULL o, + const SerdNode* ZIX_NULLABLE g, + const SerdCaret* ZIX_NULLABLE caret); + +/** + Add a statement to a model. + + This function fails if there are any active iterators on `model`. + If statement is null, then SERD_FAILURE is returned. +*/ +SERD_API SerdStatus +serd_model_insert(SerdModel* ZIX_NONNULL model, + const SerdStatement* ZIX_NONNULL statement); + +/** + Add a range of statements to a model. + + This function fails if there are any active iterators on `model`. +*/ +SERD_API SerdStatus +serd_model_insert_statements(SerdModel* ZIX_NONNULL model, + SerdCursor* ZIX_NONNULL range); + +/** + Remove a statement from a model via an iterator. + + Calling this function invalidates all other iterators on this model. + + @param model The model which `iter` points to. + + @param cursor Cursor pointing to the element to erase. This cursor is + advanced to the next statement on return. +*/ +SERD_API SerdStatus +serd_model_erase(SerdModel* ZIX_NONNULL model, SerdCursor* ZIX_NONNULL cursor); + +/** + Remove a range of statements from a model. + + This can be used with serd_model_find() to erase all statements in a model + that match a pattern. + + Calling this function invalidates all iterators on `model`. + + @param model The model which `range` points to. + + @param range Range to erase, which will be empty on return. +*/ +SERD_API SerdStatus +serd_model_erase_statements(SerdModel* ZIX_NONNULL model, + SerdCursor* ZIX_NONNULL range); + +/** + Remove everything from a model. + + Calling this function invalidates all iterators on `model`. + + @param model The model to clear. +*/ +SERD_API SerdStatus +serd_model_clear(SerdModel* ZIX_NONNULL model); + +/** + @} +*/ + +SERD_END_DECLS + +#endif // SERD_MODEL_H diff --git a/include/serd/serd.h b/include/serd/serd.h index 77d1abf8..5d208d97 100644 --- a/include/serd/serd.h +++ b/include/serd/serd.h @@ -94,6 +94,10 @@ @{ */ +#include "serd/cursor.h" +#include "serd/describe.h" +#include "serd/inserter.h" +#include "serd/model.h" #include "serd/nodes.h" /** diff --git a/include/serd/status.h b/include/serd/status.h index 5aedd5a6..90c8d83c 100644 --- a/include/serd/status.h +++ b/include/serd/status.h @@ -40,6 +40,8 @@ typedef enum { SERD_BAD_DATA, ///< Invalid data SERD_BAD_LITERAL, ///< Invalid literal SERD_BAD_PATTERN, ///< Invalid statement pattern + SERD_BAD_CURSOR, ///< Use of invalidated cursor + SERD_BAD_INDEX, ///< No suitable model index available } SerdStatus; /// Return a string describing a status code diff --git a/meson.build b/meson.build index 96fb60d1..37330570 100644 --- a/meson.build +++ b/meson.build @@ -131,12 +131,16 @@ c_headers = files( 'include/serd/buffer.h', 'include/serd/canon.h', 'include/serd/caret.h', + 'include/serd/cursor.h', + 'include/serd/describe.h', 'include/serd/env.h', 'include/serd/event.h', 'include/serd/filter.h', 'include/serd/input_stream.h', + 'include/serd/inserter.h', 'include/serd/log.h', 'include/serd/memory.h', + 'include/serd/model.h', 'include/serd/node.h', 'include/serd/node_syntax.h', 'include/serd/nodes.h', @@ -164,11 +168,16 @@ sources = files( 'src/byte_source.c', 'src/canon.c', 'src/caret.c', + 'src/compare.c', + 'src/cursor.c', + 'src/describe.c', 'src/env.c', 'src/filter.c', 'src/input_stream.c', + 'src/inserter.c', 'src/log.c', 'src/memory.c', + 'src/model.c', 'src/node.c', 'src/node_syntax.c', 'src/nodes.c', diff --git a/meson/suppressions/meson.build b/meson/suppressions/meson.build index 8e825cac..a2b8d415 100644 --- a/meson/suppressions/meson.build +++ b/meson/suppressions/meson.build @@ -83,6 +83,8 @@ if is_variable('cc') if warning_level == 'everything' c_suppressions += [ '/wd4061', # enumerator in switch is not explicitly handled + '/wd4114', # same type qualifier used more than once + '/wd4305', # truncation to smaller type '/wd4514', # unreferenced inline function has been removed '/wd4710', # function not inlined '/wd4711', # function selected for automatic inline expansion diff --git a/scripts/serd_bench.py b/scripts/serd_bench.py index db8c1c5b..35869ce6 100755 --- a/scripts/serd_bench.py +++ b/scripts/serd_bench.py @@ -76,8 +76,8 @@ def gen(sp2b_dir, n_min, n_max, step): def write_header(results, progs): "Write the header line for TSV output" - results.write("n") - for prog in progs: + results.write("n\tserd-pipe\tserd-sort") + for prog in progs[2:]: results.write("\t" + os.path.basename(prog.split()[0])) results.write("\n") @@ -191,13 +191,18 @@ def run(progs, n_min, n_max, step): cmd = "/usr/bin/time -v " + prog + " " + filename(n) with open(filename(n) + ".out", "w") as out: sys.stderr.write(cmd + "\n") - proc = subprocess.Popen( - cmd.split(), stdout=out, stderr=subprocess.PIPE + proc = subprocess.run( + cmd.split(), + check=True, + stdout=out, + stderr=subprocess.PIPE, ) - time, memory = parse_time(proc.communicate()[1].decode()) + time, memory = parse_time(proc.stderr.decode()) rows["time"] += ["%.07f" % time] - rows["throughput"] += ["%d" % (n / time)] + rows["throughput"] += ( + ["%d" % (n / time)] if time > 0.0 else ["0"] + ) rows["memory"] += [str(memory)] # Write rows to output files @@ -272,7 +277,11 @@ example: args = ap.parse_args(sys.argv[1:]) serd_opts = "-I turtle -I verbatim -O turtle -O verbatim -O expanded" - progs = ["tools/serd-pipe " + serd_opts] + args.run + progs = [ + "tools/serd-pipe " + serd_opts, + "tools/serd-sort " + serd_opts, + ] + args.run + min_n = int(args.max / args.steps) max_n = args.max step = min_n diff --git a/src/compare.c b/src/compare.c new file mode 100644 index 00000000..2ce49cc2 --- /dev/null +++ b/src/compare.c @@ -0,0 +1,147 @@ +// Copyright 2011-2021 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#include "compare.h" + +#include "statement.h" // IWYU pragma: keep + +#include "serd/node.h" +#include "serd/statement.h" +#include "zix/attributes.h" + +#include <assert.h> + +/// Compare a mandatory node with a node pattern +ZIX_PURE_FUNC static inline int +serd_node_wildcard_compare(const SerdNode* ZIX_NONNULL a, + const SerdNode* ZIX_NULLABLE b) +{ + assert(a); + return b ? serd_node_compare(a, b) : 0; +} + +/// Compare an optional graph node with a node pattern +static inline int +serd_node_graph_compare(const SerdNode* ZIX_NULLABLE a, + const SerdNode* ZIX_NULLABLE b) +{ + if (a == b) { + return 0; + } + + if (!a) { + return -1; + } + + if (!b) { + return 1; + } + + return serd_node_compare(a, b); +} + +/// Compare statements lexicographically, ignoring graph +int +serd_triple_compare(const void* const x, + const void* const y, + const void* const user_data) +{ + const int* const ordering = (const int*)user_data; + const SerdStatement* const s = (const SerdStatement*)x; + const SerdStatement* const t = (const SerdStatement*)y; + + for (unsigned i = 0U; i < 3U; ++i) { + const int o = ordering[i]; + assert(o < 3); + + const int c = serd_node_compare(s->nodes[o], t->nodes[o]); + if (c) { + return c; + } + } + + return 0; +} + +/** + Compare statments with statement patterns lexicographically, ignoring graph. + + Null nodes in the second argument are treated as wildcards, always less than + any node. +*/ +int +serd_triple_compare_pattern(const void* const x, + const void* const y, + const void* const user_data) +{ + const int* const ordering = (const int*)user_data; + const SerdStatement* const s = (const SerdStatement*)x; + const SerdStatement* const t = (const SerdStatement*)y; + + for (unsigned i = 0U; i < 3U; ++i) { + const int o = ordering[i]; + assert(o < 3); + + const int c = serd_node_wildcard_compare(s->nodes[o], t->nodes[o]); + if (c) { + return c; + } + } + + return 0; +} + +/// Compare statements lexicographically +int +serd_quad_compare(const void* const x, + const void* const y, + const void* const user_data) +{ + const int* const ordering = (const int*)user_data; + const SerdStatement* const s = (const SerdStatement*)x; + const SerdStatement* const t = (const SerdStatement*)y; + + for (unsigned i = 0U; i < 4U; ++i) { + const int o = ordering[i]; + const int c = + (o == SERD_GRAPH) + ? serd_node_graph_compare(s->nodes[SERD_GRAPH], t->nodes[SERD_GRAPH]) + : serd_node_compare(s->nodes[o], t->nodes[o]); + + if (c) { + return c; + } + } + + return 0; +} + +/** + Compare statments with statement patterns lexicographically. + + Null nodes in the second argument are treated as wildcards, always less than + any node. +*/ +int +serd_quad_compare_pattern(const void* const x, + const void* const y, + const void* const user_data) +{ + const int* const ordering = (const int*)user_data; + const SerdStatement* const s = (const SerdStatement*)x; + const SerdStatement* const t = (const SerdStatement*)y; + + for (unsigned i = 0U; i < 4U; ++i) { + const int o = ordering[i]; + const int c = + (o == SERD_GRAPH) + ? serd_node_graph_compare(s->nodes[SERD_GRAPH], t->nodes[SERD_GRAPH]) + : serd_node_wildcard_compare(s->nodes[o], t->nodes[o]); + + if (c) { + return c; + } + } + + return 0; +} diff --git a/src/compare.h b/src/compare.h new file mode 100644 index 00000000..f4dbc046 --- /dev/null +++ b/src/compare.h @@ -0,0 +1,37 @@ +// Copyright 2011-2021 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#ifndef SERD_SRC_COMPARE_H +#define SERD_SRC_COMPARE_H + +#include "zix/attributes.h" + +/// Compare statements lexicographically, ignoring graph +ZIX_PURE_FUNC int +serd_triple_compare(const void* x, const void* y, const void* user_data); + +/** + Compare statments with statement patterns lexicographically, ignoring graph. + + Null nodes in the second argument are treated as wildcards, always less than + any node. +*/ +ZIX_PURE_FUNC int +serd_triple_compare_pattern(const void* x, + const void* y, + const void* user_data); + +/// Compare statements lexicographically +ZIX_PURE_FUNC int +serd_quad_compare(const void* x, const void* y, const void* user_data); + +/** + Compare statments with statement patterns lexicographically. + + Null nodes in the second argument are treated as wildcards, always less than + any node. +*/ +ZIX_PURE_FUNC int +serd_quad_compare_pattern(const void* x, const void* y, const void* user_data); + +#endif // ZIX_SRC_COMPARE_H diff --git a/src/cursor.c b/src/cursor.c new file mode 100644 index 00000000..d6a86d19 --- /dev/null +++ b/src/cursor.c @@ -0,0 +1,224 @@ +// Copyright 2011-2020 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#include "cursor.h" + +#include "memory.h" +#include "model.h" +#include "node.h" +#include "statement.h" + +#include "serd/log.h" +#include "serd/memory.h" +#include "serd/statement.h" +#include "zix/attributes.h" +#include "zix/btree.h" +#include "zix/status.h" + +#include <assert.h> +#include <stdbool.h> +#include <string.h> + +static inline bool +statement_matches_quad(const SerdStatement* const statement, + const SerdNode* const quad[4]) +{ + return serd_statement_matches(statement, quad[0], quad[1], quad[2], quad[3]); +} + +ZIX_PURE_FUNC bool +serd_iter_in_range(const ZixBTreeIter iter, + const SerdNode* const pattern[4], + const ScanStrategy strategy) +{ + const SerdStatement* const statement = + (const SerdStatement*)zix_btree_get(iter); + + for (unsigned i = 0U; i < strategy.n_prefix; ++i) { + const unsigned field = orderings[strategy.order][i]; + if (!serd_node_pattern_match(statement->nodes[field], pattern[field])) { + return false; + } + } + + return true; +} + +static bool +serd_cursor_in_range(const SerdCursor* const cursor) +{ + return (cursor->strategy.mode == FILTER_EVERYTHING || + serd_iter_in_range(cursor->iter, cursor->pattern, cursor->strategy)); +} + +/// Seek forward as necessary until the cursor points to a matching statement +static SerdStatus +serd_cursor_seek_match(SerdCursor* const cursor) +{ + assert(cursor->strategy.mode == FILTER_EVERYTHING || + cursor->strategy.mode == FILTER_RANGE); + + for (; !zix_btree_iter_is_end(cursor->iter); + zix_btree_iter_increment(&cursor->iter)) { + if (!serd_cursor_in_range(cursor)) { + // Went past the end of the matching range, reset to end + cursor->iter = zix_btree_end_iter; + return SERD_FAILURE; + } + + const SerdStatement* s = (const SerdStatement*)zix_btree_get(cursor->iter); + if (statement_matches_quad(s, cursor->pattern)) { + break; // Found matching statement + } + } + + return SERD_SUCCESS; +} + +static bool +check_version(const SerdCursor* const cursor) +{ + if (cursor->version != cursor->model->version) { + serd_logf(cursor->model->world, + SERD_LOG_LEVEL_ERROR, + "attempt to use invalidated cursor"); + return false; + } + + return true; +} + +SerdCursor +serd_cursor_make(const SerdModel* const model, + const ZixBTreeIter iter, + const SerdNode* const pattern[4], + const ScanStrategy strategy) +{ + SerdCursor cursor = {model, + {pattern[0], pattern[1], pattern[2], pattern[3]}, + model->version, + iter, + strategy}; + + if (strategy.mode == FILTER_RANGE || strategy.mode == FILTER_EVERYTHING) { + serd_cursor_seek_match(&cursor); + } + +#ifndef NDEBUG + if (!zix_btree_iter_is_end(cursor.iter)) { + // Check that the cursor is at a matching statement + const SerdStatement* first = + (const SerdStatement*)zix_btree_get(cursor.iter); + assert(statement_matches_quad(first, pattern)); + + // Check that any nodes in the pattern are interned + for (unsigned i = 0U; i < 3; ++i) { + assert(!cursor.pattern[i] || cursor.pattern[i] == first->nodes[i]); + } + } +#endif + + return cursor; +} + +SerdCursor* +serd_cursor_copy(SerdAllocator* const allocator, const SerdCursor* const cursor) +{ + if (!cursor) { + return NULL; + } + + SerdCursor* const copy = + (SerdCursor* const)serd_amalloc(allocator, sizeof(SerdCursor)); + + if (copy) { + memcpy(copy, cursor, sizeof(SerdCursor)); + } + + return copy; +} + +const SerdStatement* +serd_cursor_get(const SerdCursor* const cursor) +{ + return ( + (cursor && !zix_btree_iter_is_end(cursor->iter) && check_version(cursor)) + ? (const SerdStatement*)zix_btree_get(cursor->iter) + : NULL); +} + +SerdStatus +serd_cursor_scan_next(SerdCursor* const cursor) +{ + if (zix_btree_iter_is_end(cursor->iter) || !check_version(cursor)) { + return SERD_BAD_CURSOR; + } + + switch (cursor->strategy.mode) { + case SCAN_EVERYTHING: + break; + + case SCAN_RANGE: + if (!serd_cursor_in_range(cursor)) { + // Went past the end of the matching range + cursor->iter = zix_btree_end_iter; + return SERD_FAILURE; + } + break; + + case FILTER_EVERYTHING: + case FILTER_RANGE: + // Seek forward to next match + return serd_cursor_seek_match(cursor); + } + + return SERD_SUCCESS; +} + +SerdStatus +serd_cursor_advance(SerdCursor* const cursor) +{ + if (!cursor) { + return SERD_FAILURE; + } + + if (zix_btree_iter_is_end(cursor->iter) || !check_version(cursor)) { + return SERD_BAD_CURSOR; + } + + const ZixStatus zst = zix_btree_iter_increment(&cursor->iter); + if (zst) { + assert(zst == ZIX_STATUS_REACHED_END); + return SERD_FAILURE; + } + + return serd_cursor_scan_next(cursor); +} + +bool +serd_cursor_is_end(const SerdCursor* const cursor) +{ + return !cursor || zix_btree_iter_is_end(cursor->iter); +} + +bool +serd_cursor_equals(const SerdCursor* const lhs, const SerdCursor* const rhs) +{ + if (serd_cursor_is_end(lhs) || serd_cursor_is_end(rhs)) { + return serd_cursor_is_end(lhs) && serd_cursor_is_end(rhs); + } + + /* We don't bother checking if the patterns match each other here, or if both + cursors have the same ordering, since both of these must be true if the + BTree iterators are equal. */ + + return (zix_btree_iter_equals(lhs->iter, rhs->iter) && + lhs->strategy.mode == rhs->strategy.mode && + lhs->strategy.n_prefix == rhs->strategy.n_prefix); +} + +void +serd_cursor_free(SerdAllocator* const allocator, SerdCursor* const cursor) +{ + serd_afree(allocator, cursor); +} diff --git a/src/cursor.h b/src/cursor.h new file mode 100644 index 00000000..d7de7471 --- /dev/null +++ b/src/cursor.h @@ -0,0 +1,71 @@ +// Copyright 2011-2020 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#ifndef SERD_SRC_CURSOR_H +#define SERD_SRC_CURSOR_H + +#include "serd/cursor.h" +#include "serd/model.h" +#include "serd/node.h" +#include "serd/status.h" +#include "zix/btree.h" + +#include <stdbool.h> +#include <stddef.h> + +#define N_STATEMENT_ORDERS 12u + +/// An iteration mode that determines what to skip and when to end +typedef enum { + SCAN_EVERYTHING, ///< Iterate over entire store + SCAN_RANGE, ///< Iterate over range with equal prefix + FILTER_EVERYTHING, ///< Iterate to end of store, filtering + FILTER_RANGE, ///< Iterate over range with equal prefix, filtering +} ScanMode; + +/// A strategy for searching and iterating over a statement index +typedef struct { + ScanMode mode; ///< Iteration mode + unsigned n_prefix; ///< Number of prefix nodes that match the index + SerdStatementOrder order; ///< Order of index to scan +} ScanStrategy; + +struct SerdCursorImpl { + const SerdModel* model; ///< Model being iterated over + const SerdNode* pattern[4]; ///< Search pattern (nodes in model or null) + size_t version; ///< Model version when iterator was created + ZixBTreeIter iter; ///< Current position in index + ScanStrategy strategy; ///< Index scanning strategy +}; + +/// Lookup table of ordered indices for each SerdStatementOrder +static const unsigned orderings[N_STATEMENT_ORDERS][4] = { + {0U, 1U, 2U, 3U}, // SPOG + {0U, 2U, 1U, 3U}, // SOPG + {2U, 1U, 0U, 3U}, // OPSG + {2U, 0U, 1U, 3U}, // OPSG + {1U, 0U, 2U, 3U}, // PSOG + {1U, 2U, 0U, 3U}, // PSOG + {3U, 0U, 1U, 2U}, // GSPO + {3U, 0U, 2U, 1U}, // GSPO + {3U, 2U, 1U, 0U}, // GOPS + {3U, 2U, 0U, 1U}, // GOPS + {3U, 1U, 0U, 2U}, // GPSO + {3U, 1U, 2U, 0U} // GPSO +}; + +SerdCursor +serd_cursor_make(const SerdModel* model, + ZixBTreeIter iter, + const SerdNode* const pattern[4], + ScanStrategy strategy); + +SerdStatus +serd_cursor_scan_next(SerdCursor* cursor); + +bool +serd_iter_in_range(ZixBTreeIter iter, + const SerdNode* const pattern[4], + ScanStrategy strategy); + +#endif // SERD_SRC_CURSOR_H diff --git a/src/describe.c b/src/describe.c new file mode 100644 index 00000000..a7012cf9 --- /dev/null +++ b/src/describe.c @@ -0,0 +1,314 @@ +// Copyright 2011-2023 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#include "cursor.h" +#include "model.h" +#include "world.h" + +// Define the types used in the hash interface for more type safety +#define ZIX_HASH_KEY_TYPE const SerdNode +#define ZIX_HASH_RECORD_TYPE const SerdNode + +#include "serd/cursor.h" +#include "serd/describe.h" +#include "serd/memory.h" +#include "serd/model.h" +#include "serd/node.h" +#include "serd/sink.h" +#include "serd/statement.h" +#include "serd/status.h" +#include "serd/world.h" +#include "zix/allocator.h" +#include "zix/attributes.h" +#include "zix/digest.h" +#include "zix/hash.h" +#include "zix/status.h" + +#include <assert.h> +#include <stdbool.h> +#include <stddef.h> + +typedef enum { NAMED, ANON_S, ANON_O, LIST_S, LIST_O } NodeStyle; + +typedef struct { + SerdAllocator* allocator; // Allocator for auxiliary structures + const SerdModel* model; // Model to read from + const SerdSink* sink; // Sink to write description to + ZixHash* list_subjects; // Nodes written in the current list or null + SerdDescribeFlags flags; // Flags to control description +} DescribeContext; + +static SerdStatus +write_range_statement(const DescribeContext* ctx, + unsigned depth, + SerdStatementFlags statement_flags, + const SerdStatement* statement, + const SerdNode* last_subject, + bool write_types); + +static NodeStyle +get_node_style(const SerdModel* const model, const SerdNode* const node) +{ + if (serd_node_type(node) != SERD_BLANK) { + return NAMED; // Non-blank node can't be anonymous + } + + const size_t n_as_object = serd_model_count(model, NULL, NULL, node, NULL); + if (n_as_object > 1) { + return NAMED; // Blank node referred to several times + } + + if (serd_model_count(model, node, model->world->rdf_first, NULL, NULL) == 1 && + serd_model_count(model, node, model->world->rdf_rest, NULL, NULL) == 1 && + !serd_model_ask(model, NULL, model->world->rdf_rest, node, NULL)) { + return n_as_object == 0 ? LIST_S : LIST_O; + } + + return n_as_object == 0 ? ANON_S : ANON_O; +} + +static const SerdNode* +identity(const SerdNode* const record) +{ + return record; +} + +static ZixHashCode +ptr_hash(const SerdNode* const ptr) +{ + return zix_digest(0U, &ptr, sizeof(SerdNode*)); +} + +static bool +ptr_equals(const SerdNode* const a, const SerdNode* const b) +{ + return *(const void* const*)a == *(const void* const*)b; +} + +static SerdStatus +write_pretty_range(const DescribeContext* const ctx, + const unsigned depth, + SerdCursor* const range, + const SerdNode* last_subject, + bool write_types) +{ + SerdStatus st = SERD_SUCCESS; + const SerdStatement* statement = serd_cursor_get(range); + + while (statement) { + // Write this statement (and possibly more to describe anonymous nodes) + if ((st = write_range_statement( + ctx, depth, 0U, statement, last_subject, write_types))) { + break; + } + + // Update the last subject and advance the cursor + last_subject = serd_statement_subject(statement); + st = serd_cursor_advance(range); + statement = serd_cursor_get(range); + } + + return st > SERD_FAILURE ? st : SERD_SUCCESS; +} + +static SerdStatus +write_list(const DescribeContext* const ctx, + const unsigned depth, + SerdStatementFlags flags, + const SerdNode* node, + const SerdNode* const graph) +{ + const SerdModel* const model = ctx->model; + const SerdWorld* const world = model->world; + const SerdSink* const sink = ctx->sink; + const SerdNode* const rdf_first = world->rdf_first; + const SerdNode* const rdf_rest = world->rdf_rest; + const SerdNode* const rdf_nil = world->rdf_nil; + SerdStatus st = SERD_SUCCESS; + + const SerdStatement* fs = + serd_model_get_statement(model, node, rdf_first, NULL, graph); + + assert(fs); // Shouldn't get here if it doesn't at least have an rdf:first + + while (!st && !serd_node_equals(node, rdf_nil)) { + // Write rdf:first statement for this node + if ((st = write_range_statement(ctx, depth, flags, fs, NULL, false))) { + return st; + } + + // Get rdf:rest statement + const SerdStatement* const rs = + serd_model_get_statement(model, node, rdf_rest, NULL, graph); + + if (!rs) { + // Terminate malformed list with missing rdf:rest + return serd_sink_write(sink, 0, node, rdf_rest, rdf_nil, graph); + } + + // Terminate if the next node has no rdf:first + const SerdNode* const next = serd_statement_object(rs); + if (!(fs = serd_model_get_statement(model, next, rdf_first, NULL, graph))) { + return serd_sink_write(sink, 0, node, rdf_rest, rdf_nil, graph); + } + + // Write rdf:next statement and move to the next node + st = serd_sink_write_statement(sink, 0, rs); + node = next; + flags = 0U; + } + + return st; +} + +static bool +skip_range_statement(const SerdModel* const model, + const SerdStatement* const statement) +{ + const SerdNode* const subject = serd_statement_subject(statement); + const NodeStyle subject_style = get_node_style(model, subject); + const SerdNode* const predicate = serd_statement_predicate(statement); + + if (subject_style == ANON_O || subject_style == LIST_O) { + return true; // Skip subject that will be inlined elsewhere + } + + if (subject_style == LIST_S && + (serd_node_equals(predicate, model->world->rdf_first) || + serd_node_equals(predicate, model->world->rdf_rest))) { + return true; // Skip list statement that write_list will handle + } + + return false; +} + +static SerdStatus +write_subject_types(const DescribeContext* const ctx, + const unsigned depth, + const SerdNode* const subject, + const SerdNode* const graph) +{ + SerdCursor* const t = serd_model_find(ctx->allocator, + ctx->model, + subject, + ctx->model->world->rdf_type, + NULL, + graph); + + const SerdStatus st = + t ? write_pretty_range(ctx, depth + 1, t, subject, true) : SERD_SUCCESS; + + serd_cursor_free(ctx->allocator, t); + return st; +} + +static bool +types_first_for_subject(const DescribeContext* const ctx, const NodeStyle style) +{ + return style != LIST_S && !(ctx->flags & SERD_NO_TYPE_FIRST); +} + +static SerdStatus +write_range_statement(const DescribeContext* const ctx, + const unsigned depth, + SerdStatementFlags statement_flags, + const SerdStatement* ZIX_NONNULL statement, + const SerdNode* ZIX_NULLABLE last_subject, + const bool write_types) +{ + const SerdModel* const model = ctx->model; + const SerdSink* const sink = ctx->sink; + const SerdNode* const subject = serd_statement_subject(statement); + const NodeStyle subject_style = get_node_style(model, subject); + const SerdNode* const predicate = serd_statement_predicate(statement); + const SerdNode* const object = serd_statement_object(statement); + const NodeStyle object_style = get_node_style(model, object); + const SerdNode* const graph = serd_statement_graph(statement); + SerdStatus st = SERD_SUCCESS; + + if (depth == 0U) { + if (skip_range_statement(model, statement)) { + return SERD_SUCCESS; // Skip subject that will be inlined elsewhere + } + + if (subject_style == LIST_S) { + // First write inline list subject, which this statement will follow + if (zix_hash_insert(ctx->list_subjects, subject) != ZIX_STATUS_EXISTS) { + st = write_list(ctx, 2, statement_flags | SERD_LIST_S, subject, graph); + } + } + } + + if (st) { + return st; + } + + // If this is a new subject, write types first if necessary + const bool types_first = types_first_for_subject(ctx, subject_style); + if (subject != last_subject && types_first) { + st = write_subject_types(ctx, depth, subject, graph); + } + + // Skip type statement if it would be written another time (just above) + if (subject_style != LIST_S && !write_types && + serd_node_equals(predicate, model->world->rdf_type)) { + return st; + } + + // Set up the flags for this statement + statement_flags |= + (((subject_style == ANON_S) * (SerdStatementFlags)SERD_EMPTY_S) | + ((object_style == ANON_O) * (SerdStatementFlags)SERD_ANON_O) | + ((object_style == LIST_O) * (SerdStatementFlags)SERD_LIST_O)); + + // Finally write this statement + if ((st = serd_sink_write_statement(sink, statement_flags, statement))) { + return st; + } + + if (object_style == ANON_O) { + // Follow an anonymous object with its description like "[ ... ]" + SerdCursor* const iter = + serd_model_find(ctx->allocator, model, object, NULL, NULL, NULL); + + if (!(st = write_pretty_range(ctx, depth + 1, iter, last_subject, false))) { + st = serd_sink_write_end(sink, object); + } + + serd_cursor_free(ctx->allocator, iter); + + } else if (object_style == LIST_O) { + // Follow a list object with its description like "( ... )" + st = write_list(ctx, depth + 1, 0U, object, graph); + } + + return st; +} + +SerdStatus +serd_describe_range(SerdAllocator* const allocator, + const SerdCursor* const range, + const SerdSink* sink, + const SerdDescribeFlags flags) +{ + if (serd_cursor_is_end(range)) { + return SERD_SUCCESS; + } + + assert(sink); + + SerdCursor copy = *range; + + ZixHash* const list_subjects = + zix_hash_new((ZixAllocator*)allocator, identity, ptr_hash, ptr_equals); + + SerdStatus st = SERD_BAD_ALLOC; + if (list_subjects) { + DescribeContext ctx = {allocator, range->model, sink, list_subjects, flags}; + + st = write_pretty_range(&ctx, 0, ©, NULL, (flags & SERD_NO_TYPE_FIRST)); + } + + zix_hash_free(list_subjects); + return st; +} diff --git a/src/inserter.c b/src/inserter.c new file mode 100644 index 00000000..f3b8631b --- /dev/null +++ b/src/inserter.c @@ -0,0 +1,146 @@ +// Copyright 2011-2020 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#include "memory.h" +#include "model.h" +#include "statement.h" + +#include "serd/caret.h" +#include "serd/event.h" +#include "serd/inserter.h" +#include "serd/log.h" +#include "serd/model.h" +#include "serd/node.h" +#include "serd/nodes.h" +#include "serd/sink.h" +#include "serd/statement.h" +#include "serd/status.h" +#include "serd/uri.h" +#include "serd/world.h" + +#include <assert.h> +#include <stdbool.h> +#include <stdlib.h> + +typedef struct { + SerdModel* model; + SerdNode* default_graph; +} SerdInserterData; + +static bool +can_insert(SerdWorld* const world, const SerdNode* const node) +{ + if (node) { + switch (serd_node_type(node)) { + case SERD_LITERAL: + return can_insert(world, serd_node_datatype(node)); + + case SERD_URI: + if (!serd_uri_string_has_scheme(serd_node_string(node))) { + serd_logf(world, + SERD_LOG_LEVEL_ERROR, + "attempt to insert relative URI <%s> into model", + serd_node_string(node)); + return false; + } + break; + + case SERD_BLANK: + case SERD_VARIABLE: + break; + } + } + + return true; +} + +static SerdStatus +serd_inserter_on_statement(SerdInserterData* const data, + const SerdStatementFlags flags, + const SerdStatement* const statement) +{ + (void)flags; + + SerdModel* const model = data->model; + SerdWorld* const world = model->world; + + // Check that every node is expanded so it is context-free + for (unsigned i = 0; i < 4; ++i) { + if (!can_insert(world, serd_statement_node(statement, (SerdField)i))) { + return SERD_BAD_DATA; + } + } + + const SerdNode* const s = + serd_nodes_intern(model->nodes, serd_statement_subject(statement)); + + const SerdNode* const p = + serd_nodes_intern(model->nodes, serd_statement_predicate(statement)); + + const SerdNode* const o = + serd_nodes_intern(model->nodes, serd_statement_object(statement)); + + const SerdNode* const g = serd_nodes_intern( + model->nodes, + serd_statement_graph(statement) ? serd_statement_graph(statement) + : data->default_graph); + + const SerdCaret* const caret = + (data->model->flags & SERD_STORE_CARETS) ? statement->caret : NULL; + + const SerdStatus st = + serd_model_add_with_caret(data->model, s, p, o, g, caret); + + return st > SERD_FAILURE ? st : SERD_SUCCESS; +} + +static SerdStatus +serd_inserter_on_event(void* const handle, const SerdEvent* const event) +{ + SerdInserterData* const data = (SerdInserterData*)handle; + + if (event->type == SERD_STATEMENT) { + return serd_inserter_on_statement( + data, event->statement.flags, event->statement.statement); + } + + return SERD_SUCCESS; +} + +static SerdInserterData* +serd_inserter_data_new(SerdModel* const model, + const SerdNode* const default_graph) +{ + SerdInserterData* const data = + (SerdInserterData*)serd_wcalloc(model->world, 1, sizeof(SerdInserterData)); + + if (data) { + data->model = model; + data->default_graph = serd_node_copy(model->allocator, default_graph); + } + + return data; +} + +static void +serd_inserter_data_free(void* const ptr) +{ + SerdInserterData* const data = (SerdInserterData*)ptr; + serd_node_free(data->model->allocator, data->default_graph); + serd_wfree(data->model->world, data); +} + +SerdSink* +serd_inserter_new(SerdModel* const model, const SerdNode* const default_graph) +{ + assert(model); + + SerdEventFunc func = serd_inserter_on_event; + SerdInserterData* const data = serd_inserter_data_new(model, default_graph); + + return data ? serd_sink_new(serd_world_allocator(model->world), + data, + func, + (SerdFreeFunc)serd_inserter_data_free) + : NULL; +} diff --git a/src/model.c b/src/model.c new file mode 100644 index 00000000..89b37d83 --- /dev/null +++ b/src/model.c @@ -0,0 +1,820 @@ +// Copyright 2011-2020 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#include "model.h" + +#include "caret.h" +#include "compare.h" +#include "cursor.h" +#include "memory.h" +#include "statement.h" + +#include "serd/caret.h" +#include "serd/log.h" +#include "serd/node.h" +#include "serd/statement.h" +#include "serd/status.h" +#include "zix/allocator.h" +#include "zix/btree.h" +#include "zix/status.h" + +#include <assert.h> +#include <stdbool.h> +#include <stddef.h> + +static const SerdNode* const everything_pattern[4] = {NULL, NULL, NULL, NULL}; + +/// A 3-bit signature for a triple pattern used as a table index +typedef enum { + SERD_SIGNATURE_XXX, // 000 (???) + SERD_SIGNATURE_XXO, // 001 (??O) + SERD_SIGNATURE_XPX, // 010 (?P?) + SERD_SIGNATURE_XPO, // 011 (?PO) + SERD_SIGNATURE_SXX, // 100 (S??) + SERD_SIGNATURE_SXO, // 101 (S?O) + SERD_SIGNATURE_SPX, // 110 (SP?) + SERD_SIGNATURE_SPO, // 111 (SPO) +} SerdPatternSignature; + +static SerdPatternSignature +serd_model_pattern_signature(const bool with_s, + const bool with_p, + const bool with_o) +{ + return (SerdPatternSignature)(((with_s ? 1U : 0U) << 2U) + + ((with_p ? 1U : 0U) << 1U) + + ((with_o ? 1U : 0U))); +} + +static ZixBTreeCompareFunc +serd_model_index_comparator(const SerdModel* const model, + const SerdStatementOrder order) +{ + return (order < SERD_ORDER_GSPO && !(model->flags & SERD_STORE_GRAPHS)) + ? serd_triple_compare + : serd_quad_compare; +} + +static ZixBTreeCompareFunc +serd_model_pattern_comparator(const SerdModel* const model, + const SerdStatementOrder order) +{ + return (order < SERD_ORDER_GSPO && !(model->flags & SERD_STORE_GRAPHS)) + ? serd_triple_compare_pattern + : serd_quad_compare_pattern; +} + +SerdStatus +serd_model_add_index(SerdModel* const model, const SerdStatementOrder order) +{ + assert(model); + + if (model->indices[order]) { + return SERD_FAILURE; + } + + const unsigned* const ordering = orderings[order]; + const ZixBTreeCompareFunc comparator = + serd_model_index_comparator(model, order); + + model->indices[order] = + zix_btree_new((ZixAllocator*)model->allocator, comparator, ordering); + + if (!model->indices[order]) { + return SERD_BAD_ALLOC; + } + + // Insert statements from the default index + ZixStatus zst = ZIX_STATUS_SUCCESS; + if (order != model->default_order) { + ZixBTree* const default_index = model->indices[model->default_order]; + for (ZixBTreeIter i = zix_btree_begin(default_index); + !zst && !zix_btree_iter_is_end(i); + zix_btree_iter_increment(&i)) { + zst = zix_btree_insert(model->indices[order], zix_btree_get(i)); + } + } + + return zst ? serd_model_drop_index(model, order) : SERD_SUCCESS; +} + +SerdStatus +serd_model_drop_index(SerdModel* const model, const SerdStatementOrder order) +{ + assert(model); + + if (!model->indices[order]) { + return SERD_FAILURE; + } + + if (order == model->default_order) { + return SERD_BAD_CALL; + } + + zix_btree_free(model->indices[order], NULL, NULL); + model->indices[order] = NULL; + return SERD_SUCCESS; +} + +static SerdModel* +serd_model_new_with_allocator(SerdAllocator* const allocator, + SerdWorld* const world, + const SerdStatementOrder default_order, + const SerdModelFlags flags) +{ + assert(world); + + SerdNodes* const nodes = serd_nodes_new(allocator); + if (!nodes) { + return NULL; + } + + SerdModel* model = + (SerdModel*)serd_acalloc(allocator, 1, sizeof(struct SerdModelImpl)); + + if (!model) { + serd_nodes_free(nodes); + return NULL; + } + + model->allocator = allocator; + model->world = world; + model->nodes = nodes; + model->default_order = default_order; + model->flags = flags; + + if (serd_model_add_index(model, default_order)) { + serd_nodes_free(nodes); + serd_wfree(world, model); + return NULL; + } + + const ScanStrategy end_strategy = {SCAN_EVERYTHING, 0U, default_order}; + + model->end = serd_cursor_make(model, + zix_btree_end(model->indices[default_order]), + everything_pattern, + end_strategy); + + return model; +} + +SerdModel* +serd_model_new(SerdWorld* const world, + const SerdStatementOrder default_order, + const SerdModelFlags flags) +{ + return serd_model_new_with_allocator( + serd_world_allocator(world), world, default_order, flags); +} + +static void +log_bad_index(const SerdModel* const model, + const char* const description, + const SerdStatementOrder index_order, + const bool s, + const bool p, + const bool o, + const bool g) +{ + static const char* const order_strings[] = { + "S P O G", + "S O P G", + "O P S G", + "O S P G", + "P S O G", + "P O S G", + "G S P O", + "G S O P", + "G O P S", + "G O S P", + "G P S O", + "G P O S", + }; + + serd_logf(model->world, + SERD_LOG_LEVEL_WARNING, + "%s index (%s) for (%s %s %s%s) query", + description, + order_strings[index_order], + s ? "S" : "?", + p ? "P" : "?", + o ? "O" : "?", + (model->flags & SERD_STORE_GRAPHS) ? g ? " G" : " ?" : ""); +} + +static SerdCursor +make_begin_cursor(const SerdModel* const model, const SerdStatementOrder order) +{ + if (!model->indices[order]) { + log_bad_index(model, "missing", order, false, false, false, false); + return model->end; + } + + const ScanStrategy strategy = {SCAN_EVERYTHING, 0U, order}; + + return serd_cursor_make(model, + zix_btree_begin(model->indices[order]), + everything_pattern, + strategy); +} + +SerdModel* +serd_model_copy(SerdAllocator* const allocator, const SerdModel* const model) +{ + assert(model); + + SerdModel* copy = serd_model_new_with_allocator( + allocator, model->world, model->default_order, model->flags); + + SerdCursor cursor = make_begin_cursor(model, model->default_order); + serd_model_insert_statements(copy, &cursor); + + assert(serd_model_size(model) == serd_model_size(copy)); + assert(serd_nodes_size(model->nodes) == serd_nodes_size(copy->nodes)); + return copy; +} + +bool +serd_model_equals(const SerdModel* const a, const SerdModel* const b) +{ + if (!a && !b) { + return true; + } + + if (!a || !b || serd_model_size(a) != serd_model_size(b)) { + return false; + } + + SerdCursor ia = make_begin_cursor(a, a->default_order); + SerdCursor ib = make_begin_cursor(b, b->default_order); + + while (!serd_cursor_is_end(&ia) && !serd_cursor_is_end(&ib)) { + if (!serd_statement_equals(serd_cursor_get(&ia), serd_cursor_get(&ib)) || + serd_cursor_advance(&ia) > SERD_FAILURE || + serd_cursor_advance(&ib) > SERD_FAILURE) { + return false; + } + } + + return serd_cursor_is_end(&ia) && serd_cursor_is_end(&ib); +} + +static void +serd_model_drop_statement(SerdModel* const model, + SerdStatement* const statement) +{ + assert(statement); + + for (unsigned i = 0U; i < 4; ++i) { + if (statement->nodes[i]) { + serd_nodes_deref(model->nodes, statement->nodes[i]); + } + } + + if (statement->caret && serd_caret_document(statement->caret)) { + serd_nodes_deref(model->nodes, serd_caret_document(statement->caret)); + } + + serd_statement_free(model->allocator, statement); +} + +typedef struct { + SerdAllocator* allocator; +} DestroyContext; + +static void +destroy_tree_statement(void* ptr, const void* user_data) +{ + const DestroyContext* const ctx = (const DestroyContext*)user_data; + + serd_statement_free(ctx->allocator, (SerdStatement*)ptr); +} + +void +serd_model_free(SerdModel* const model) +{ + if (!model) { + return; + } + + // Free all statements (which are owned by the default index) + ZixBTree* const default_index = model->indices[model->default_order]; + const DestroyContext ctx = {model->allocator}; + zix_btree_clear(default_index, destroy_tree_statement, &ctx); + + // Free indices themselves + for (unsigned i = 0U; i < N_STATEMENT_ORDERS; ++i) { + zix_btree_free(model->indices[i], NULL, NULL); + } + + serd_nodes_free(model->nodes); + serd_wfree(model->world, model); +} + +SerdWorld* +serd_model_world(SerdModel* const model) +{ + assert(model); + return model->world; +} + +const SerdNodes* +serd_model_nodes(const SerdModel* const model) +{ + assert(model); + return model->nodes; +} + +SerdStatementOrder +serd_model_default_order(const SerdModel* const model) +{ + assert(model); + return model->default_order; +} + +SerdModelFlags +serd_model_flags(const SerdModel* const model) +{ + assert(model); + return model->flags; +} + +size_t +serd_model_size(const SerdModel* const model) +{ + assert(model); + return zix_btree_size(model->indices[model->default_order]); +} + +bool +serd_model_empty(const SerdModel* const model) +{ + assert(model); + return serd_model_size(model) == 0; +} + +SerdCursor* +serd_model_begin_ordered(SerdAllocator* const allocator, + const SerdModel* const model, + const SerdStatementOrder order) +{ + assert(model); + + const SerdCursor cursor = make_begin_cursor(model, order); + + return serd_cursor_copy(allocator, &cursor); +} + +SerdCursor* +serd_model_begin(SerdAllocator* const allocator, const SerdModel* const model) +{ + assert(model); + return serd_model_begin_ordered(allocator, model, model->default_order); +} + +const SerdCursor* +serd_model_end(const SerdModel* const model) +{ + assert(model); + return &model->end; +} + +static SerdStatementOrder +simple_order(const SerdStatementOrder order) +{ + return order < SERD_ORDER_GSPO ? order : (SerdStatementOrder)(order - 6U); +} + +/// Return the best index scanning strategy for a pattern with given nodes +static ScanStrategy +serd_model_strategy(const SerdModel* const model, + const bool with_s, + const bool with_p, + const bool with_o, + const bool with_g) +{ +#define N_STRATEGY_TIERS 4U + + const SerdStatementOrder default_order = simple_order(model->default_order); + const ScanStrategy none = {SCAN_EVERYTHING, 0U, default_order}; + + const ScanStrategy strategies[N_STRATEGY_TIERS][8] = { + // Preferred perfect strategies: SPO, SOP, OPS, PSO + { + none, + {SCAN_RANGE, 1U, SERD_ORDER_OPS}, // ??O + {SCAN_RANGE, 1U, SERD_ORDER_PSO}, // ?P? + {SCAN_RANGE, 2U, SERD_ORDER_OPS}, // ?PO + {SCAN_RANGE, 1U, SERD_ORDER_SPO}, // S?? + {SCAN_RANGE, 2U, SERD_ORDER_SOP}, // S?O + {SCAN_RANGE, 2U, SERD_ORDER_SPO}, // SP? + {SCAN_RANGE, 3U, default_order} // SPO + }, + + // Alternate perfect strategies: adds OSP and POS + { + none, // ??? + {SCAN_RANGE, 1U, SERD_ORDER_OSP}, // ??O + {SCAN_RANGE, 1U, SERD_ORDER_POS}, // ?P? + {SCAN_RANGE, 2U, SERD_ORDER_POS}, // ?PO + {SCAN_RANGE, 1U, SERD_ORDER_SOP}, // S?? + {SCAN_RANGE, 2U, SERD_ORDER_OSP}, // S?O + {SCAN_RANGE, 2U, SERD_ORDER_PSO}, // SP? + none // SPO + }, + + // Preferred partial prefix strategies + { + none, // ??? + none, // ??O + none, // ?P? + {FILTER_RANGE, 1U, SERD_ORDER_PSO}, // ?PO + none, // S?? + {FILTER_RANGE, 1U, SERD_ORDER_SPO}, // S?O + {FILTER_RANGE, 1U, SERD_ORDER_SOP}, // SP? + none // SPO + }, + + // Alternate partial prefix strategies + { + none, // ??? + none, // ??O + none, // ?P? + {FILTER_RANGE, 1U, SERD_ORDER_OSP}, // ?PO + none, // S?? + {FILTER_RANGE, 1U, SERD_ORDER_OPS}, // S?O + {FILTER_RANGE, 1U, SERD_ORDER_POS}, // SP? + none // SPO + }, + }; + + // Construct a 3-bit signature for this triple pattern + const SerdPatternSignature sig = + serd_model_pattern_signature(with_s, with_p, with_o); + if (!sig && !with_g) { + return none; + } + + // Try to find a strategy we can support, from most to least preferred + for (unsigned t = 0U; t < N_STRATEGY_TIERS; ++t) { + ScanStrategy strategy = strategies[t][sig]; + const SerdStatementOrder triple_order = strategy.order; + + assert(strategy.order < SERD_ORDER_GSPO); + + if (strategy.n_prefix > 0) { + if (with_g) { + const SerdStatementOrder quad_order = + (SerdStatementOrder)(triple_order + 6U); + + if (model->indices[quad_order]) { + strategy.order = quad_order; + ++strategy.n_prefix; + return strategy; + } + } + + if (model->indices[triple_order]) { + return strategy; + } + } + } + + // Indices don't help with the triple, try to at least stay in the graph + if (with_g) { + for (unsigned i = SERD_ORDER_GSPO; i <= SERD_ORDER_GPOS; ++i) { + if (model->indices[i]) { + const ScanMode mode = sig == 0x000 ? SCAN_RANGE : FILTER_RANGE; + const ScanStrategy strategy = {mode, 1U, (SerdStatementOrder)i}; + + return strategy; + } + } + } + + // All is lost, regress to linear search + const ScanStrategy linear = {FILTER_EVERYTHING, 0U, model->default_order}; + return linear; +} + +static SerdCursor +serd_model_search(const SerdModel* const model, + const SerdNode* const s, + const SerdNode* const p, + const SerdNode* const o, + const SerdNode* const g) +{ + // Build a pattern of interned nodes + const SerdNode* pattern[4] = {serd_nodes_existing(model->nodes, s), + serd_nodes_existing(model->nodes, p), + serd_nodes_existing(model->nodes, o), + serd_nodes_existing(model->nodes, g)}; + + // If some node isn't in the model at all, no need to search for statements + const int n_given = !!s + !!p + !!o + !!g; + if (n_given != (!!pattern[0] + !!pattern[1] + !!pattern[2] + !!pattern[3])) { + return model->end; + } + + // Determine the best available search strategy + const ScanStrategy strategy = serd_model_strategy(model, s, p, o, g); + const SerdStatementOrder order = strategy.order; + ZixBTree* const index = model->indices[order]; + + // Issue any performance warnings, and return early for degenerate cases + switch (strategy.mode) { + case SCAN_EVERYTHING: + return make_begin_cursor(model, order); + case SCAN_RANGE: + break; + case FILTER_EVERYTHING: + log_bad_index(model, "using effectively linear", order, s, p, o, g); + return serd_cursor_make(model, zix_btree_begin(index), pattern, strategy); + case FILTER_RANGE: + log_bad_index(model, "filtering partial", order, s, p, o, g); + break; + } + + // Find the first statement matching the pattern in the index + ZixBTreeIter iter = zix_btree_end_iter; + zix_btree_lower_bound(index, + serd_model_pattern_comparator(model, order), + orderings[order], + pattern, + &iter); + + return (!zix_btree_iter_is_end(iter) && + serd_iter_in_range(iter, pattern, strategy)) + ? serd_cursor_make(model, iter, pattern, strategy) + : model->end; +} + +SerdCursor* +serd_model_find(SerdAllocator* const allocator, + const SerdModel* const model, + const SerdNode* const s, + const SerdNode* const p, + const SerdNode* const o, + const SerdNode* const g) +{ + assert(model); + + const SerdCursor cursor = serd_model_search(model, s, p, o, g); + + return zix_btree_iter_is_end(cursor.iter) + ? NULL + : serd_cursor_copy(allocator, &cursor); +} + +const SerdNode* +serd_model_get(const SerdModel* const model, + const SerdNode* const s, + const SerdNode* const p, + const SerdNode* const o, + const SerdNode* const g) +{ + assert(model); + + const SerdStatement* const statement = + serd_model_get_statement(model, s, p, o, g); + + return !statement ? NULL + : !s ? serd_statement_subject(statement) + : !p ? serd_statement_predicate(statement) + : !o ? serd_statement_object(statement) + : !g ? serd_statement_graph(statement) + : NULL; +} + +const SerdStatement* +serd_model_get_statement(const SerdModel* const model, + const SerdNode* const s, + const SerdNode* const p, + const SerdNode* const o, + const SerdNode* const g) +{ + assert(model); + + if ((bool)s + (bool)p + (bool)o != 2 && + (bool)s + (bool)p + (bool)o + (bool)g != 3) { + return NULL; + } + + const SerdCursor i = serd_model_search(model, s, p, o, g); + + return serd_cursor_get(&i); +} + +size_t +serd_model_count(const SerdModel* const model, + const SerdNode* const s, + const SerdNode* const p, + const SerdNode* const o, + const SerdNode* const g) +{ + assert(model); + + SerdCursor i = serd_model_search(model, s, p, o, g); + size_t count = 0; + + for (; !serd_cursor_is_end(&i); serd_cursor_advance(&i)) { + ++count; + } + + return count; +} + +bool +serd_model_ask(const SerdModel* const model, + const SerdNode* const s, + const SerdNode* const p, + const SerdNode* const o, + const SerdNode* const g) +{ + assert(model); + + const SerdCursor c = serd_model_search(model, s, p, o, g); + + return !serd_cursor_is_end(&c); +} + +static SerdCaret* +serd_model_intern_caret(SerdModel* const model, const SerdCaret* const caret) +{ + if (!caret) { + return NULL; + } + + SerdCaret* const copy = + (SerdCaret*)serd_acalloc(model->allocator, 1, sizeof(SerdCaret)); + + if (copy) { + copy->document = serd_nodes_intern(model->nodes, caret->document); + copy->line = caret->line; + copy->col = caret->col; + } + + return copy; +} + +SerdStatus +serd_model_add_with_caret(SerdModel* const model, + const SerdNode* const s, + const SerdNode* const p, + const SerdNode* const o, + const SerdNode* const g, + const SerdCaret* const caret) +{ + assert(model); + + SerdStatement* const statement = + serd_statement_new(model->allocator, s, p, o, g, NULL); + + if (!statement) { + return SERD_BAD_ALLOC; + } + + statement->caret = serd_model_intern_caret(model, caret); + + bool added = false; + for (unsigned i = 0U; i < N_STATEMENT_ORDERS; ++i) { + if (model->indices[i]) { + if (!zix_btree_insert(model->indices[i], statement)) { + added = true; + } else if ((SerdStatementOrder)i == model->default_order) { + break; // Statement already indexed + } + } + } + + ++model->version; + if (added) { + return SERD_SUCCESS; + } + + serd_model_drop_statement(model, statement); + return SERD_FAILURE; +} + +SerdStatus +serd_model_add(SerdModel* const model, + const SerdNode* const s, + const SerdNode* const p, + const SerdNode* const o, + const SerdNode* const g) +{ + return serd_model_add_with_caret(model, + serd_nodes_intern(model->nodes, s), + serd_nodes_intern(model->nodes, p), + serd_nodes_intern(model->nodes, o), + serd_nodes_intern(model->nodes, g), + NULL); +} + +SerdStatus +serd_model_insert(SerdModel* const model, const SerdStatement* const statement) +{ + SerdNodes* const nodes = model->nodes; + + return serd_model_add_with_caret( + model, + serd_nodes_intern(nodes, serd_statement_subject(statement)), + serd_nodes_intern(nodes, serd_statement_predicate(statement)), + serd_nodes_intern(nodes, serd_statement_object(statement)), + serd_nodes_intern(nodes, serd_statement_graph(statement)), + serd_statement_caret(statement)); +} + +SerdStatus +serd_model_insert_statements(SerdModel* const model, SerdCursor* const range) +{ + const SerdStatement* statement = serd_cursor_get(range); + SerdStatus st = SERD_SUCCESS; + + while (!st && statement) { + if (!(st = serd_model_insert(model, statement))) { + st = serd_cursor_advance(range); + } + + statement = serd_cursor_get(range); + } + + return st; +} + +SerdStatus +serd_model_erase(SerdModel* const model, SerdCursor* const cursor) +{ + assert(model); + assert(cursor); + + const SerdStatement* statement = serd_cursor_get(cursor); + SerdStatement* removed = NULL; + ZixStatus zst = ZIX_STATUS_SUCCESS; + + if (!statement) { + return SERD_FAILURE; + } + + // Erase from the index associated with this cursor + zst = zix_btree_remove(model->indices[cursor->strategy.order], + statement, + (void**)&removed, + &cursor->iter); + + // No error should be possible with a valid cursor + assert(!zst); + assert(removed); + assert(removed == statement); + + for (unsigned i = SERD_ORDER_SPO; i <= SERD_ORDER_GPOS; ++i) { + ZixBTree* index = model->indices[i]; + + if (index && i != (unsigned)cursor->strategy.order) { + SerdStatement* ignored = NULL; + ZixBTreeIter next = zix_btree_end_iter; + zst = zix_btree_remove(index, statement, (void**)&ignored, &next); + + assert(zst == ZIX_STATUS_SUCCESS || zst == ZIX_STATUS_NOT_FOUND); + assert(!ignored || ignored == removed); + } + } + serd_cursor_scan_next(cursor); + + serd_model_drop_statement(model, removed); + cursor->version = ++model->version; + + (void)zst; + return SERD_SUCCESS; +} + +SerdStatus +serd_model_erase_statements(SerdModel* const model, SerdCursor* const range) +{ + assert(model); + assert(range); + + SerdStatus st = SERD_SUCCESS; + + while (!st && !serd_cursor_is_end(range)) { + st = serd_model_erase(model, range); + } + + return st; +} + +SerdStatus +serd_model_clear(SerdModel* const model) +{ + assert(model); + + SerdStatus st = SERD_SUCCESS; + SerdCursor i = make_begin_cursor(model, model->default_order); + + while (!st && !serd_cursor_is_end(&i)) { + st = serd_model_erase(model, &i); + } + + return st; +} diff --git a/src/model.h b/src/model.h new file mode 100644 index 00000000..a67ba087 --- /dev/null +++ b/src/model.h @@ -0,0 +1,29 @@ +// Copyright 2011-2020 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#ifndef SERD_SRC_MODEL_H +#define SERD_SRC_MODEL_H + +#include "cursor.h" + +#include "serd/cursor.h" +#include "serd/memory.h" +#include "serd/model.h" +#include "serd/nodes.h" +#include "serd/world.h" +#include "zix/btree.h" + +#include <stddef.h> + +struct SerdModelImpl { + SerdAllocator* allocator; ///< Allocator for everything in this model + SerdWorld* world; ///< World this model is a part of + SerdNodes* nodes; ///< Interned nodes in this model + ZixBTree* indices[12]; ///< Trees of SerdStatement pointers + SerdCursor end; ///< End cursor (always the same) + size_t version; ///< Version incremented on every change + SerdStatementOrder default_order; ///< Order for main statement-owning index + SerdModelFlags flags; ///< Active indices and features +}; + +#endif // SERD_SRC_MODEL_H diff --git a/src/reader.c b/src/reader.c index b7a64ce4..5ce81962 100644 --- a/src/reader.c +++ b/src/reader.c @@ -89,7 +89,7 @@ tolerate_status(const SerdReader* const reader, const SerdStatus status) if (status == SERD_BAD_STREAM || status == SERD_BAD_STACK || status == SERD_BAD_WRITE || status == SERD_NO_DATA || - status == SERD_BAD_CALL) { + status == SERD_BAD_CALL || status == SERD_BAD_CURSOR) { return false; } diff --git a/src/string.c b/src/string.c index b8227100..ac964705 100644 --- a/src/string.c +++ b/src/string.c @@ -64,6 +64,10 @@ serd_strerror(const SerdStatus status) return "Invalid literal"; case SERD_BAD_PATTERN: return "Invalid statement pattern"; + case SERD_BAD_CURSOR: + return "Invalid cursor"; + case SERD_BAD_INDEX: + return "No suitable model index available"; } return "Unknown error"; diff --git a/test/meson.build b/test/meson.build index 6ca0e38b..8e0688f6 100644 --- a/test/meson.build +++ b/test/meson.build @@ -23,6 +23,7 @@ simple_script_paths = [ 'test_multifile.py', 'test_patterns.py', 'test_quiet.py', + 'test_sort.py', 'test_stdin.py', 'test_write_error.py', ] @@ -129,10 +130,12 @@ subdir('headers') unit_tests = [ 'canon', 'caret', + 'cursor', 'env', 'filter', 'free_null', 'log', + 'model', 'node', 'node_syntax', 'nodes', @@ -227,6 +230,18 @@ simple_command_tests = { ['-k', '512', '-s', '<go:>a<go:> .'], ], }, + 'sort': { + 'bad': [ + ['-c', 'CHAOS'], + ['-c'], + ['-z'], + ['/no/such/file'], + ], + 'good': [ + ['-V'], + ['-h'], + ], + }, } foreach tool, tests : simple_command_tests @@ -441,7 +456,7 @@ if is_variable('serd_filter') # Command line options test( - 'garbage_pattern', + 'bad_pattern', tool, args: ['junk', serd_ttl], env: test_env, @@ -489,6 +504,34 @@ if is_variable('serd_filter') subdir('extra/filter') endif +# Tests specific to serd-sort +if is_variable('serd_sort') + sort_script_args = common_script_args + ['--tool', serd_sort] + + test( + 'stdin', + files('test_stdin.py'), + args: sort_script_args, + env: test_env, + suite: ['tools', 'sort', 'input'], + ) + test( + 'missing_output', + serd_sort, + args: ['-o', '/does/not/exist.ttl', serd_ttl], + env: test_env, + should_fail: true, + suite: ['tools', 'sort', 'output'], + ) + test( + 'sort', + files('test_sort.py'), + args: sort_script_args + files('sort/input.trig'), + env: test_env, + suite: ['tools', 'sort'], + ) +endif + ########################### # Data-Driven Test Suites # ########################### @@ -651,7 +694,36 @@ if is_variable('serd_pipe') run_suite, args: pipe_script_args + args, env: test_env, - suite: ['suite'], + suite: ['suite', 'pipe'], + timeout: 240, + ) + endforeach +endif + +# Run model test suites using serd-sort +if is_variable('serd_sort') + sort_suite_names = [ + 'nquads', + 'ntriples', + 'trig', + 'turtle', + + 'good', + 'pattern', + 'perfect_forward', + 'perfect_reverse', + 'qualify', + 'terse', + ] + + foreach name : sort_suite_names + args = test_suites[name] + test( + name, + run_suite, + args: sort_script_args + ['--unique'] + args, + env: test_env, + suite: ['suite', 'sort'], timeout: 240, ) endforeach diff --git a/test/run_suite.py b/test/run_suite.py index 463e40c4..aa95b90f 100755 --- a/test/run_suite.py +++ b/test/run_suite.py @@ -37,7 +37,7 @@ TEST_TYPES = [ ] -def run_eval_test(command, in_path, good_path, out_path): +def run_eval_test(command, in_path, good_path, out_path, getlines): """Run a positive eval test and return whether the output matches.""" command = command + ["-o", out_path, in_path] @@ -45,7 +45,9 @@ def run_eval_test(command, in_path, good_path, out_path): with open(good_path, "r", encoding="utf-8") as good: with open(out_path, "r", encoding="utf-8") as out: - return util.lines_equal(list(good), list(out), good_path, out_path) + return util.lines_equal( + getlines(good), getlines(out), good_path, out_path + ) def run_positive_test(command, in_path): @@ -93,8 +95,13 @@ def run_entry(args, entry, command, out_dir, suite_dir): if args.reverse: in_path, good_path = good_path, in_path - out_path = os.path.join(out_dir, os.path.basename(good_path)) - return run_eval_test(command, in_path, good_path, out_path) + return run_eval_test( + command, + in_path, + good_path, + os.path.join(out_dir, os.path.basename(good_path)), + lambda f: sorted(set(f)) if args.unique else list(f), + ) def run_suite(args, command, out_dir): @@ -154,6 +161,7 @@ def main(): parser.add_argument("--report", help="path to write result report to") parser.add_argument("--reverse", action="store_true", help="reverse test") parser.add_argument("--tool", default="tools/serd-pipe", help="executable") + parser.add_argument("--unique", action="store_true", help="sort lines") parser.add_argument("--wrapper", default="", help="executable wrapper") parser.add_argument("manifest", help="test suite manifest.ttl file") parser.add_argument("base_uri", help="base URI for tests") diff --git a/test/sort/GOPS.nq b/test/sort/GOPS.nq new file mode 100644 index 00000000..3d033b6e --- /dev/null +++ b/test/sort/GOPS.nq @@ -0,0 +1,13 @@ +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> . +_:b1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Anonymous> <http://example.org/graph1> . +<http://example.org/s> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Subject> <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> . +<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> . +<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> . +<http://example.org/a> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/OtherSubject> <http://example.org/graph2> . +<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> . diff --git a/test/sort/GOSP.nq b/test/sort/GOSP.nq new file mode 100644 index 00000000..3d033b6e --- /dev/null +++ b/test/sort/GOSP.nq @@ -0,0 +1,13 @@ +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> . +_:b1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Anonymous> <http://example.org/graph1> . +<http://example.org/s> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Subject> <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> . +<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> . +<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> . +<http://example.org/a> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/OtherSubject> <http://example.org/graph2> . +<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> . diff --git a/test/sort/GPSO.nq b/test/sort/GPSO.nq new file mode 100644 index 00000000..67e13aa7 --- /dev/null +++ b/test/sort/GPSO.nq @@ -0,0 +1,13 @@ +<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> . +<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> . +<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> . +<http://example.org/s> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Subject> <http://example.org/graph1> . +_:b1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Anonymous> <http://example.org/graph1> . +<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> . +<http://example.org/a> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/OtherSubject> <http://example.org/graph2> . diff --git a/test/sort/GSOP.nq b/test/sort/GSOP.nq new file mode 100644 index 00000000..dbfcb7c1 --- /dev/null +++ b/test/sort/GSOP.nq @@ -0,0 +1,13 @@ +<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> . +<http://example.org/s> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Subject> <http://example.org/graph1> . +<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> . +<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> . +_:b1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Anonymous> <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> . +<http://example.org/a> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/OtherSubject> <http://example.org/graph2> . +<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> . diff --git a/test/sort/GSPO.nq b/test/sort/GSPO.nq new file mode 100644 index 00000000..5aab6e1e --- /dev/null +++ b/test/sort/GSPO.nq @@ -0,0 +1,13 @@ +<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> . +<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> . +<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> . +<http://example.org/s> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Subject> <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> . +_:b1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Anonymous> <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> . +<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> . +<http://example.org/a> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/OtherSubject> <http://example.org/graph2> . diff --git a/test/sort/OPS.nq b/test/sort/OPS.nq new file mode 100644 index 00000000..593760be --- /dev/null +++ b/test/sort/OPS.nq @@ -0,0 +1,13 @@ +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> . +_:b1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Anonymous> <http://example.org/graph1> . +<http://example.org/a> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/OtherSubject> <http://example.org/graph2> . +<http://example.org/s> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Subject> <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> . +<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> . +_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> . +<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> . +<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> . diff --git a/test/sort/OSP.nq b/test/sort/OSP.nq new file mode 100644 index 00000000..593760be --- /dev/null +++ b/test/sort/OSP.nq @@ -0,0 +1,13 @@ +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> . +_:b1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Anonymous> <http://example.org/graph1> . +<http://example.org/a> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/OtherSubject> <http://example.org/graph2> . +<http://example.org/s> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Subject> <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> . +<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> . +_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> . +<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> . +<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> . diff --git a/test/sort/POS.nq b/test/sort/POS.nq new file mode 100644 index 00000000..20eabfe1 --- /dev/null +++ b/test/sort/POS.nq @@ -0,0 +1,13 @@ +<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> . +<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> . +<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> . +<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> . +_:b1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Anonymous> <http://example.org/graph1> . +<http://example.org/a> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/OtherSubject> <http://example.org/graph2> . +<http://example.org/s> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Subject> <http://example.org/graph1> . diff --git a/test/sort/PSO.nq b/test/sort/PSO.nq new file mode 100644 index 00000000..113dd549 --- /dev/null +++ b/test/sort/PSO.nq @@ -0,0 +1,13 @@ +<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> . +<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> . +<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> . +<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> . +<http://example.org/a> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/OtherSubject> <http://example.org/graph2> . +<http://example.org/s> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Subject> <http://example.org/graph1> . +_:b1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Anonymous> <http://example.org/graph1> . diff --git a/test/sort/SOP.nq b/test/sort/SOP.nq new file mode 100644 index 00000000..7867eb24 --- /dev/null +++ b/test/sort/SOP.nq @@ -0,0 +1,13 @@ +<http://example.org/a> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/OtherSubject> <http://example.org/graph2> . +<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> . +<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> . +<http://example.org/s> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Subject> <http://example.org/graph1> . +<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> . +<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> . +_:b1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Anonymous> <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> . diff --git a/test/sort/SPO.nq b/test/sort/SPO.nq new file mode 100644 index 00000000..2b09a976 --- /dev/null +++ b/test/sort/SPO.nq @@ -0,0 +1,13 @@ +<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> . +<http://example.org/a> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/OtherSubject> <http://example.org/graph2> . +<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> . +<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> . +<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> . +<http://example.org/s> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Subject> <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> . +_:b1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Anonymous> <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> . diff --git a/test/sort/input.trig b/test/sort/input.trig new file mode 100644 index 00000000..9561d9b8 --- /dev/null +++ b/test/sort/input.trig @@ -0,0 +1,22 @@ +@prefix eg: <http://example.org/> . + +eg:graph1 { + eg:s + a eg:Subject ; + eg:blank [ + a eg:Anonymous ; + eg:with eg:aProperty , + eg:orAnother + ] ; + eg:list ( + 1 + 2 + ) ; + eg:literal "s1" . +} + +eg:graph2 { + eg:a + a eg:OtherSubject ; + eg:b eg:c . +} diff --git a/test/sort/pretty.nq b/test/sort/pretty.nq new file mode 100644 index 00000000..97851b82 --- /dev/null +++ b/test/sort/pretty.nq @@ -0,0 +1,13 @@ +<http://example.org/s> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Subject> <http://example.org/graph1> . +<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> . +_:b1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Anonymous> <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> . +<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> . +<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> . +<http://example.org/a> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/OtherSubject> <http://example.org/graph2> . +<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> . diff --git a/test/sort/untyped.nq b/test/sort/untyped.nq new file mode 100644 index 00000000..74625d7d --- /dev/null +++ b/test/sort/untyped.nq @@ -0,0 +1,12 @@ +<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> . +<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> . +<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> . +<http://example.org/s> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Subject> <http://example.org/graph1> . +<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> . +<http://example.org/a> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/OtherSubject> <http://example.org/graph2> . diff --git a/test/test_cursor.c b/test/test_cursor.c new file mode 100644 index 00000000..fbc78271 --- /dev/null +++ b/test/test_cursor.c @@ -0,0 +1,114 @@ +// Copyright 2011-2021 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#undef NDEBUG + +#include "serd/cursor.h" +#include "serd/model.h" +#include "serd/node.h" +#include "serd/nodes.h" +#include "serd/status.h" +#include "serd/world.h" + +#include <assert.h> +#include <stddef.h> + +static void +test_copy(void) +{ + assert(!serd_cursor_copy(NULL, NULL)); + + SerdWorld* const world = serd_world_new(NULL); + SerdModel* const model = serd_model_new(world, SERD_ORDER_SPO, 0U); + SerdCursor* const begin = serd_model_begin(NULL, model); + SerdCursor* const copy = serd_cursor_copy(NULL, begin); + + assert(serd_cursor_equals(copy, begin)); + + serd_cursor_free(NULL, copy); + serd_cursor_free(NULL, begin); + serd_model_free(model); + serd_world_free(world); +} + +static void +test_comparison(void) +{ + SerdWorld* const world = serd_world_new(NULL); + SerdNodes* const nodes = serd_world_nodes(world); + SerdModel* const model = serd_model_new(world, SERD_ORDER_SPO, 0U); + + serd_model_add_index(model, SERD_ORDER_OPS); + + const SerdNode* const a = + serd_nodes_get(nodes, serd_a_uri_string("http://example.org/a")); + + const SerdNode* const b = + serd_nodes_get(nodes, serd_a_uri_string("http://example.org/b")); + + const SerdNode* const c = + serd_nodes_get(nodes, serd_a_uri_string("http://example.org/c")); + + // Add a single statement + assert(!serd_model_add(model, a, b, c, NULL)); + + // Make cursors that point to the statement but via different patterns + SerdCursor* const c1 = serd_model_find(NULL, model, a, NULL, NULL, NULL); + SerdCursor* const c2 = serd_model_find(NULL, model, a, b, NULL, NULL); + SerdCursor* const c3 = serd_model_find(NULL, model, NULL, b, c, NULL); + + // Ensure that they refer to the same statement but are not equal + assert(c1); + assert(c2); + assert(c3); + assert(serd_cursor_get(c1) == serd_cursor_get(c2)); + assert(serd_cursor_get(c2) == serd_cursor_get(c3)); + assert(!serd_cursor_equals(c1, c2)); + assert(!serd_cursor_equals(c2, c3)); + assert(!serd_cursor_equals(c1, c3)); + + // Check that none are equal to begin (which has a different mode) or end + SerdCursor* const begin = serd_model_begin(NULL, model); + assert(!serd_cursor_equals(c1, begin)); + assert(!serd_cursor_equals(c2, begin)); + assert(!serd_cursor_equals(c3, begin)); + assert(!serd_cursor_equals(c1, serd_model_end(model))); + assert(!serd_cursor_equals(c2, serd_model_end(model))); + assert(!serd_cursor_equals(c3, serd_model_end(model))); + serd_cursor_free(NULL, begin); + + // Check that a cursor that points to it via the same pattern is equal + SerdCursor* const c4 = serd_model_find(NULL, model, a, b, NULL, NULL); + assert(c4); + assert(serd_cursor_get(c4) == serd_cursor_get(c1)); + assert(serd_cursor_equals(c4, c2)); + assert(!serd_cursor_equals(c4, c3)); + serd_cursor_free(NULL, c4); + + // Advance everything to the end + assert(serd_cursor_advance(c1) == SERD_FAILURE); + assert(serd_cursor_advance(c2) == SERD_FAILURE); + assert(serd_cursor_advance(c3) == SERD_FAILURE); + + // Check that they are now equal, and equal to the model's end + assert(serd_cursor_equals(c1, c2)); + assert(serd_cursor_equals(c1, serd_model_end(model))); + assert(serd_cursor_equals(c2, serd_model_end(model))); + + serd_cursor_free(NULL, c3); + serd_cursor_free(NULL, c2); + serd_cursor_free(NULL, c1); + serd_model_free(model); + serd_world_free(world); +} + +int +main(void) +{ + assert(serd_cursor_advance(NULL) == SERD_FAILURE); + + test_copy(); + test_comparison(); + + return 0; +} diff --git a/test/test_free_null.c b/test/test_free_null.c index 7b186fba..89846c7f 100644 --- a/test/test_free_null.c +++ b/test/test_free_null.c @@ -4,12 +4,15 @@ #undef NDEBUG #include "serd/caret.h" +#include "serd/cursor.h" #include "serd/env.h" #include "serd/memory.h" +#include "serd/model.h" #include "serd/node.h" #include "serd/nodes.h" #include "serd/reader.h" #include "serd/sink.h" +#include "serd/statement.h" #include "serd/world.h" #include "serd/writer.h" @@ -27,6 +30,9 @@ main(void) serd_writer_free(NULL); serd_nodes_free(NULL); serd_caret_free(NULL, NULL); + serd_model_free(NULL); + serd_statement_free(NULL, NULL); + serd_cursor_free(NULL, NULL); return 0; } diff --git a/test/test_model.c b/test/test_model.c new file mode 100644 index 00000000..feb35440 --- /dev/null +++ b/test/test_model.c @@ -0,0 +1,1494 @@ +// Copyright 2011-2021 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#undef NDEBUG + +#include "failing_allocator.h" + +#include "serd/buffer.h" +#include "serd/caret.h" +#include "serd/cursor.h" +#include "serd/describe.h" +#include "serd/env.h" +#include "serd/inserter.h" +#include "serd/log.h" +#include "serd/memory.h" +#include "serd/model.h" +#include "serd/node.h" +#include "serd/nodes.h" +#include "serd/output_stream.h" +#include "serd/sink.h" +#include "serd/statement.h" +#include "serd/status.h" +#include "serd/string_view.h" +#include "serd/syntax.h" +#include "serd/world.h" +#include "serd/writer.h" +#include "zix/attributes.h" + +#include <assert.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#define WILDCARD_NODE NULL + +#define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#" +#define RDF_FIRST NS_RDF "first" +#define RDF_REST NS_RDF "rest" +#define RDF_NIL NS_RDF "nil" + +#define N_OBJECTS_PER 2U + +typedef const SerdNode* Quad[4]; + +typedef struct { + Quad query; + int expected_num_results; +} QueryTest; + +static const SerdNode* +uri(SerdWorld* world, const unsigned num) +{ + char str[] = "http://example.org/0000000000"; + + snprintf(str + 19, 11, "%010u", num); + + return serd_nodes_get(serd_world_nodes(world), serd_a_uri_string(str)); +} + +static int +generate(SerdWorld* world, + SerdModel* model, + size_t n_quads, + const SerdNode* graph) +{ + SerdNodes* nodes = serd_world_nodes(world); + SerdStatus st = SERD_SUCCESS; + + for (unsigned i = 0; i < n_quads; ++i) { + unsigned num = (i * N_OBJECTS_PER) + 1U; + + const SerdNode* ids[2 + N_OBJECTS_PER]; + for (unsigned j = 0; j < 2 + N_OBJECTS_PER; ++j) { + ids[j] = uri(world, num++); + } + + for (unsigned j = 0; j < N_OBJECTS_PER; ++j) { + st = serd_model_add(model, ids[0], ids[1], ids[2 + j], graph); + assert(!st); + } + } + + // Add some literals + + // (98 4 "hello") and (98 4 "hello"^^<5>) + const SerdNode* hello = serd_nodes_get(nodes, serd_a_string("hello")); + + const SerdNode* hello_gb = serd_nodes_get( + nodes, serd_a_plain_literal(serd_string("hello"), serd_string("en-gb"))); + + const SerdNode* hello_us = serd_nodes_get( + nodes, serd_a_plain_literal(serd_string("hello"), serd_string("en-us"))); + + const SerdNode* hello_t4 = + serd_nodes_get(nodes, + serd_a_typed_literal(serd_string("hello"), + serd_node_string_view(uri(world, 4)))); + + const SerdNode* hello_t5 = + serd_nodes_get(nodes, + serd_a_typed_literal(serd_string("hello"), + serd_node_string_view(uri(world, 5)))); + + assert(!serd_model_add(model, uri(world, 98), uri(world, 4), hello, graph)); + assert( + !serd_model_add(model, uri(world, 98), uri(world, 4), hello_t5, graph)); + + // (96 4 "hello"^^<4>) and (96 4 "hello"^^<5>) + assert( + !serd_model_add(model, uri(world, 96), uri(world, 4), hello_t4, graph)); + assert( + !serd_model_add(model, uri(world, 96), uri(world, 4), hello_t5, graph)); + + // (94 5 "hello") and (94 5 "hello"@en-gb) + assert(!serd_model_add(model, uri(world, 94), uri(world, 5), hello, graph)); + assert( + !serd_model_add(model, uri(world, 94), uri(world, 5), hello_gb, graph)); + + // (92 6 "hello"@en-us) and (92 6 "hello"@en-gb) + assert( + !serd_model_add(model, uri(world, 92), uri(world, 6), hello_us, graph)); + assert( + !serd_model_add(model, uri(world, 92), uri(world, 6), hello_gb, graph)); + + // (14 6 "bonjour"@fr) and (14 6 "salut"@fr) + + const SerdNode* const bonjour = serd_nodes_get( + nodes, serd_a_plain_literal(serd_string("bonjour"), serd_string("fr"))); + + const SerdNode* const salut = serd_nodes_get( + nodes, serd_a_plain_literal(serd_string("salut"), serd_string("fr"))); + + assert(!serd_model_add(model, uri(world, 14), uri(world, 6), bonjour, graph)); + assert(!serd_model_add(model, uri(world, 14), uri(world, 6), salut, graph)); + + // Attempt to add duplicates + assert(serd_model_add(model, uri(world, 14), uri(world, 6), salut, graph)); + + // Add a blank node subject + const SerdNode* ablank = + serd_nodes_get(nodes, serd_a_blank(serd_string("ablank"))); + + assert(!serd_model_add(model, ablank, uri(world, 6), salut, graph)); + + // Add statement with URI object + assert(!serd_model_add(model, ablank, uri(world, 6), uri(world, 7), graph)); + + return EXIT_SUCCESS; +} + +static int +test_read(SerdWorld* world, + SerdModel* model, + const SerdNode* g, + const unsigned n_quads) +{ + SerdAllocator* const allocator = serd_default_allocator(); + SerdNodes* const nodes = serd_nodes_new(allocator); + + SerdCursor* cursor = serd_model_begin(NULL, model); + const SerdStatement* prev = NULL; + for (; !serd_cursor_equals(cursor, serd_model_end(model)); + serd_cursor_advance(cursor)) { + const SerdStatement* statement = serd_cursor_get(cursor); + assert(statement); + assert(serd_statement_subject(statement)); + assert(serd_statement_predicate(statement)); + assert(serd_statement_object(statement)); + assert(!serd_statement_equals(statement, prev)); + assert(!serd_statement_equals(prev, statement)); + prev = statement; + } + + // Attempt to increment past end + assert(serd_cursor_advance(cursor) == SERD_BAD_CURSOR); + serd_cursor_free(NULL, cursor); + + const SerdStringView s = serd_string("hello"); + + const SerdNode* plain_hello = serd_nodes_get(nodes, serd_a_string_view(s)); + + const SerdNode* type4_hello = serd_nodes_get( + nodes, serd_a_typed_literal(s, serd_node_string_view(uri(world, 4)))); + + const SerdNode* type5_hello = serd_nodes_get( + nodes, serd_a_typed_literal(s, serd_node_string_view(uri(world, 5)))); + + const SerdNode* gb_hello = + serd_nodes_get(nodes, serd_a_plain_literal(s, serd_string("en-gb"))); + + const SerdNode* us_hello = + serd_nodes_get(nodes, serd_a_plain_literal(s, serd_string("en-us"))); + +#define NUM_PATTERNS 18 + + QueryTest patterns[NUM_PATTERNS] = { + {{NULL, NULL, NULL}, (int)(n_quads * N_OBJECTS_PER) + 12}, + {{uri(world, 1), WILDCARD_NODE, WILDCARD_NODE}, 2}, + {{uri(world, 9), uri(world, 9), uri(world, 9)}, 0}, + {{uri(world, 1), uri(world, 2), uri(world, 4)}, 1}, + {{uri(world, 3), uri(world, 4), WILDCARD_NODE}, 2}, + {{WILDCARD_NODE, uri(world, 2), uri(world, 4)}, 1}, + {{WILDCARD_NODE, WILDCARD_NODE, uri(world, 4)}, 1}, + {{uri(world, 1), WILDCARD_NODE, WILDCARD_NODE}, 2}, + {{uri(world, 1), WILDCARD_NODE, uri(world, 4)}, 1}, + {{WILDCARD_NODE, uri(world, 2), WILDCARD_NODE}, 2}, + {{uri(world, 98), uri(world, 4), plain_hello}, 1}, + {{uri(world, 98), uri(world, 4), type5_hello}, 1}, + {{uri(world, 96), uri(world, 4), type4_hello}, 1}, + {{uri(world, 96), uri(world, 4), type5_hello}, 1}, + {{uri(world, 94), uri(world, 5), plain_hello}, 1}, + {{uri(world, 94), uri(world, 5), gb_hello}, 1}, + {{uri(world, 92), uri(world, 6), gb_hello}, 1}, + {{uri(world, 92), uri(world, 6), us_hello}, 1}}; + + Quad match = {uri(world, 1), uri(world, 2), uri(world, 4), g}; + assert(serd_model_ask(model, match[0], match[1], match[2], match[3])); + + Quad nomatch = {uri(world, 1), uri(world, 2), uri(world, 9), g}; + assert( + !serd_model_ask(model, nomatch[0], nomatch[1], nomatch[2], nomatch[3])); + + assert(!serd_model_get(model, NULL, NULL, uri(world, 3), g)); + assert(!serd_model_get(model, uri(world, 1), uri(world, 99), NULL, g)); + + assert(serd_node_equals( + serd_model_get(model, uri(world, 1), uri(world, 2), NULL, g), + uri(world, 3))); + assert(serd_node_equals( + serd_model_get(model, uri(world, 1), NULL, uri(world, 3), g), + uri(world, 2))); + assert(serd_node_equals( + serd_model_get(model, NULL, uri(world, 2), uri(world, 3), g), + uri(world, 1))); + if (g) { + assert(serd_node_equals( + serd_model_get(model, uri(world, 1), uri(world, 2), uri(world, 3), NULL), + g)); + } + + for (unsigned i = 0; i < NUM_PATTERNS; ++i) { + QueryTest test = patterns[i]; + Quad pat = {test.query[0], test.query[1], test.query[2], g}; + + SerdCursor* range = + serd_model_find(NULL, model, pat[0], pat[1], pat[2], pat[3]); + int num_results = 0; + for (; !serd_cursor_is_end(range); serd_cursor_advance(range)) { + ++num_results; + + const SerdStatement* first = serd_cursor_get(range); + assert(first); + assert(serd_statement_matches(first, pat[0], pat[1], pat[2], pat[3])); + } + + serd_cursor_free(NULL, range); + + assert(num_results == test.expected_num_results); + } + +#undef NUM_PATTERNS + + // Query blank node subject + + const SerdNode* ablank = + serd_nodes_get(nodes, serd_a_blank(serd_string("ablank"))); + + Quad pat = {ablank, 0, 0}; + int num_results = 0; + SerdCursor* range = + serd_model_find(NULL, model, pat[0], pat[1], pat[2], pat[3]); + + for (; !serd_cursor_is_end(range); serd_cursor_advance(range)) { + ++num_results; + const SerdStatement* statement = serd_cursor_get(range); + assert(serd_statement_matches(statement, pat[0], pat[1], pat[2], pat[3])); + } + serd_cursor_free(NULL, range); + + assert(num_results == 2); + + // Test nested queries + const SerdNode* last_subject = 0; + range = serd_model_find(NULL, model, NULL, NULL, NULL, NULL); + for (; !serd_cursor_is_end(range); serd_cursor_advance(range)) { + const SerdStatement* statement = serd_cursor_get(range); + const SerdNode* subject = serd_statement_subject(statement); + if (subject == last_subject) { + continue; + } + + Quad subpat = {subject, 0, 0}; + SerdCursor* const subrange = + serd_model_find(NULL, model, subpat[0], subpat[1], subpat[2], subpat[3]); + + assert(subrange); + + const SerdStatement* substatement = serd_cursor_get(subrange); + uint64_t num_sub_results = 0; + assert(serd_statement_subject(substatement) == subject); + for (; !serd_cursor_is_end(subrange); serd_cursor_advance(subrange)) { + const SerdStatement* const front = serd_cursor_get(subrange); + assert(front); + + assert(serd_statement_matches( + front, subpat[0], subpat[1], subpat[2], subpat[3])); + + ++num_sub_results; + } + serd_cursor_free(NULL, subrange); + assert(num_sub_results == N_OBJECTS_PER); + + uint64_t count = serd_model_count(model, subject, 0, 0, 0); + assert(count == num_sub_results); + + last_subject = subject; + } + serd_cursor_free(NULL, range); + + serd_nodes_free(nodes); + return 0; +} + +static SerdStatus +expected_error(void* const handle, + const SerdLogLevel level, + const size_t n_fields, + const SerdLogField* const fields, + const SerdStringView message) +{ + (void)level; + (void)n_fields; + (void)fields; + (void)handle; + + fprintf(stderr, "expected: %s\n", message.data); + return SERD_SUCCESS; +} + +ZIX_PURE_FUNC static SerdStatus +ignore_only_index_error(void* const handle, + const SerdLogLevel level, + const size_t n_fields, + const SerdLogField* const fields, + const SerdStringView message) +{ + (void)handle; + (void)level; + (void)n_fields; + (void)fields; + + const bool is_index_error = strstr(message.data, "index"); + + assert(is_index_error); + + return is_index_error ? SERD_SUCCESS : SERD_UNKNOWN_ERROR; +} + +static int +test_failed_new_alloc(SerdWorld* ignored_world, const unsigned n_quads) +{ + (void)ignored_world; + (void)n_quads; + + SerdFailingAllocator allocator = serd_failing_allocator(); + SerdWorld* const world = serd_world_new(&allocator.base); + const size_t n_world_allocs = allocator.n_allocations; + + // Successfully allocate a model to count the number of allocations + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0U); + assert(model); + + // Test that each allocation failing is handled gracefully + const size_t n_new_allocs = allocator.n_allocations - n_world_allocs; + for (size_t i = 0; i < n_new_allocs; ++i) { + allocator.n_remaining = i; + assert(!serd_model_new(world, SERD_ORDER_SPO, 0U)); + } + + serd_model_free(model); + serd_world_free(world); + return 0; +} + +static int +test_free_null(SerdWorld* world, const unsigned n_quads) +{ + (void)world; + (void)n_quads; + + serd_model_free(NULL); // Shouldn't crash + return 0; +} + +static int +test_get_world(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0U); + assert(serd_model_world(model) == world); + serd_model_free(model); + return 0; +} + +static int +test_get_default_order(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdModel* const model1 = serd_model_new(world, SERD_ORDER_SPO, 0U); + SerdModel* const model2 = serd_model_new(world, SERD_ORDER_GPSO, 0U); + + assert(serd_model_default_order(model1) == SERD_ORDER_SPO); + assert(serd_model_default_order(model2) == SERD_ORDER_GPSO); + + serd_model_free(model2); + serd_model_free(model1); + return 0; +} + +static int +test_get_flags(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + const SerdModelFlags flags = SERD_STORE_GRAPHS | SERD_STORE_CARETS; + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, flags); + + assert(serd_model_flags(model) & SERD_STORE_GRAPHS); + assert(serd_model_flags(model) & SERD_STORE_CARETS); + serd_model_free(model); + return 0; +} + +static int +test_all_begin(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0U); + SerdCursor* begin = serd_model_begin(NULL, model); + SerdCursor* first = serd_model_find(NULL, model, NULL, NULL, NULL, NULL); + + assert(serd_cursor_equals(begin, first)); + + serd_cursor_free(NULL, first); + serd_cursor_free(NULL, begin); + serd_model_free(model); + return 0; +} + +static int +test_begin_ordered(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, SERD_STORE_GRAPHS); + + assert( + !serd_model_add(model, uri(world, 1), uri(world, 2), uri(world, 3), 0)); + + SerdCursor* i = serd_model_begin_ordered(NULL, model, SERD_ORDER_SPO); + assert(i); + assert(!serd_cursor_is_end(i)); + serd_cursor_free(NULL, i); + + i = serd_model_begin_ordered(NULL, model, SERD_ORDER_POS); + assert(serd_cursor_is_end(i)); + serd_cursor_free(NULL, i); + + serd_model_free(model); + return 0; +} + +static int +test_add_with_iterator(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0U); + + serd_set_log_func(world, expected_error, NULL); + assert( + !serd_model_add(model, uri(world, 1), uri(world, 2), uri(world, 3), 0)); + + // Add a statement with an active iterator + SerdCursor* iter = serd_model_begin(NULL, model); + assert( + !serd_model_add(model, uri(world, 1), uri(world, 2), uri(world, 4), 0)); + + // Check that iterator has been invalidated + assert(!serd_cursor_get(iter)); + assert(serd_cursor_advance(iter) == SERD_BAD_CURSOR); + + serd_cursor_free(NULL, iter); + serd_model_free(model); + return 0; +} + +static int +test_add_remove_nodes(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0U); + + assert(serd_model_nodes(model)); + assert(serd_nodes_size(serd_model_nodes(model)) == 0); + + const SerdNode* const a = uri(world, 1); + const SerdNode* const b = uri(world, 2); + const SerdNode* const c = uri(world, 3); + + // Add 2 statements with 3 nodes + assert(!serd_model_add(model, a, b, a, NULL)); + assert(!serd_model_add(model, c, b, c, NULL)); + assert(serd_model_size(model) == 2); + assert(serd_nodes_size(serd_model_nodes(model)) == 3); + + // Remove one statement to leave 2 nodes + SerdCursor* const begin = serd_model_begin(NULL, model); + assert(!serd_model_erase(model, begin)); + assert(serd_model_size(model) == 1); + assert(serd_nodes_size(serd_model_nodes(model)) == 2); + serd_cursor_free(NULL, begin); + + // Clear the last statement to leave 0 nodes + assert(!serd_model_clear(model)); + assert(serd_nodes_size(serd_model_nodes(model)) == 0); + + serd_model_free(model); + return 0; +} + +static int +test_add_index(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdModel* const model = serd_model_new(world, SERD_ORDER_SPO, 0U); + const SerdNode* const s = uri(world, 0); + const SerdNode* const p = uri(world, 1); + const SerdNode* const o1 = uri(world, 2); + const SerdNode* const o2 = uri(world, 3); + + // Try to add an existing index + assert(serd_model_add_index(model, SERD_ORDER_SPO) == SERD_FAILURE); + + // Add a couple of statements + serd_model_add(model, s, p, o1, NULL); + serd_model_add(model, s, p, o2, NULL); + assert(serd_model_size(model) == 2); + + // Add a new index + assert(!serd_model_add_index(model, SERD_ORDER_PSO)); + + // Count statements via the new index + size_t count = 0U; + SerdCursor* cur = serd_model_find(NULL, model, NULL, p, NULL, NULL); + while (!serd_cursor_is_end(cur)) { + ++count; + serd_cursor_advance(cur); + } + serd_cursor_free(NULL, cur); + + serd_model_free(model); + assert(count == 2); + return 0; +} + +static int +test_remove_index(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdModel* const model = serd_model_new(world, SERD_ORDER_SPO, 0U); + const SerdNode* const s = uri(world, 0); + const SerdNode* const p = uri(world, 1); + const SerdNode* const o1 = uri(world, 2); + const SerdNode* const o2 = uri(world, 3); + + // Try to remove default and non-existent indices + assert(serd_model_drop_index(model, SERD_ORDER_SPO) == SERD_BAD_CALL); + assert(serd_model_drop_index(model, SERD_ORDER_PSO) == SERD_FAILURE); + + // Add a couple of statements so that dropping an index isn't trivial + serd_model_add(model, s, p, o1, NULL); + serd_model_add(model, s, p, o2, NULL); + assert(serd_model_size(model) == 2); + + assert(serd_model_add_index(model, SERD_ORDER_PSO) == SERD_SUCCESS); + assert(serd_model_drop_index(model, SERD_ORDER_PSO) == SERD_SUCCESS); + assert(serd_model_drop_index(model, SERD_ORDER_PSO) == SERD_FAILURE); + assert(serd_model_size(model) == 2); + serd_model_free(model); + return 0; +} + +static int +test_inserter(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdAllocator* const allocator = serd_default_allocator(); + SerdNodes* const nodes = serd_nodes_new(allocator); + SerdModel* const model = serd_model_new(world, SERD_ORDER_SPO, 0U); + SerdSink* const inserter = serd_inserter_new(model, NULL); + + const SerdNode* const s = + serd_nodes_get(nodes, serd_a_uri_string("http://example.org/s")); + + const SerdNode* const p = + serd_nodes_get(nodes, serd_a_uri_string("http://example.org/p")); + + const SerdNode* const rel = serd_nodes_get(nodes, serd_a_uri_string("rel")); + + serd_set_log_func(world, expected_error, NULL); + + assert(serd_sink_write(inserter, 0, s, p, rel, NULL) == SERD_BAD_DATA); + + serd_sink_free(inserter); + serd_model_free(model); + serd_nodes_free(nodes); + return 0; +} + +static int +test_erase_with_iterator(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0U); + + serd_set_log_func(world, expected_error, NULL); + assert( + !serd_model_add(model, uri(world, 1), uri(world, 2), uri(world, 3), 0)); + assert( + !serd_model_add(model, uri(world, 4), uri(world, 5), uri(world, 6), 0)); + + // Erase a statement with an active iterator + SerdCursor* iter1 = serd_model_begin(NULL, model); + SerdCursor* iter2 = serd_model_begin(NULL, model); + assert(!serd_model_erase(model, iter1)); + + // Check that erased iterator points to the next statement + const SerdStatement* const s1 = serd_cursor_get(iter1); + assert(s1); + assert( + serd_statement_matches(s1, uri(world, 4), uri(world, 5), uri(world, 6), 0)); + + // Check that other iterator has been invalidated + assert(!serd_cursor_get(iter2)); + assert(serd_cursor_advance(iter2) == SERD_BAD_CURSOR); + + // Check that erasing the end iterator does nothing + SerdCursor* const end = + serd_cursor_copy(serd_world_allocator(world), serd_model_end(model)); + + assert(serd_model_erase(model, end) == SERD_FAILURE); + + serd_cursor_free(NULL, end); + serd_cursor_free(NULL, iter2); + serd_cursor_free(NULL, iter1); + serd_model_free(model); + return 0; +} + +static int +test_add_erase(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdAllocator* const allocator = serd_default_allocator(); + + SerdNodes* const nodes = serd_nodes_new(allocator); + SerdModel* const model = serd_model_new(world, SERD_ORDER_SPO, 0U); + + // Add (s p "hello") + const SerdNode* s = uri(world, 1); + const SerdNode* p = uri(world, 2); + const SerdNode* hello = serd_nodes_get(nodes, serd_a_string("hello")); + + assert(!serd_model_add(model, s, p, hello, NULL)); + assert(serd_model_ask(model, s, p, hello, NULL)); + + // Add (s p "hi") + const SerdNode* hi = serd_nodes_get(nodes, serd_a_string("hi")); + assert(!serd_model_add(model, s, p, hi, NULL)); + assert(serd_model_ask(model, s, p, hi, NULL)); + + // Erase (s p "hi") + SerdCursor* iter = serd_model_find(NULL, model, s, p, hi, NULL); + assert(iter); + assert(!serd_model_erase(model, iter)); + assert(serd_model_size(model) == 1); + serd_cursor_free(NULL, iter); + + // Check that erased statement can not be found + SerdCursor* empty = serd_model_find(NULL, model, s, p, hi, NULL); + assert(serd_cursor_is_end(empty)); + serd_cursor_free(NULL, empty); + + serd_model_free(model); + serd_nodes_free(nodes); + return 0; +} + +static int +test_add_bad_statement(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdAllocator* const allocator = serd_world_allocator(world); + SerdNodes* const nodes = serd_nodes_new(allocator); + + const SerdNode* s = serd_nodes_get(nodes, serd_a_uri_string("urn:s")); + const SerdNode* p = serd_nodes_get(nodes, serd_a_uri_string("urn:p")); + const SerdNode* o = serd_nodes_get(nodes, serd_a_uri_string("urn:o")); + + const SerdNode* f = + serd_nodes_get(nodes, serd_a_uri_string("file:///tmp/file.ttl")); + + SerdCaret* caret = serd_caret_new(allocator, f, 16, 18); + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0U); + + assert(!serd_model_add_with_caret(model, s, p, o, NULL, caret)); + + SerdCursor* const begin = serd_model_begin(NULL, model); + const SerdStatement* statement = serd_cursor_get(begin); + assert(statement); + + assert(serd_node_equals(serd_statement_subject(statement), s)); + assert(serd_node_equals(serd_statement_predicate(statement), p)); + assert(serd_node_equals(serd_statement_object(statement), o)); + assert(!serd_statement_graph(statement)); + + const SerdCaret* statement_caret = serd_statement_caret(statement); + assert(statement_caret); + assert(serd_node_equals(serd_caret_document(statement_caret), f)); + assert(serd_caret_line(statement_caret) == 16); + assert(serd_caret_column(statement_caret) == 18); + + assert(!serd_model_erase(model, begin)); + + serd_cursor_free(NULL, begin); + serd_model_free(model); + serd_caret_free(allocator, caret); + serd_nodes_free(nodes); + return 0; +} + +static int +test_add_with_caret(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdNodes* const nodes = serd_nodes_new(serd_world_allocator(world)); + const SerdNode* lit = serd_nodes_get(nodes, serd_a_string("string")); + const SerdNode* uri = serd_nodes_get(nodes, serd_a_uri_string("urn:uri")); + + const SerdNode* blank = + serd_nodes_get(nodes, serd_a_blank(serd_string("b1"))); + + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0U); + + assert(serd_model_add(model, lit, uri, uri, NULL)); + assert(serd_model_add(model, uri, blank, uri, NULL)); + assert(serd_model_add(model, uri, uri, uri, lit)); + + serd_model_free(model); + serd_nodes_free(nodes); + return 0; +} + +static int +test_erase_all(SerdWorld* world, const unsigned n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0U); + serd_model_add_index(model, SERD_ORDER_OSP); + generate(world, model, n_quads, NULL); + + SerdCursor* iter = serd_model_begin(NULL, model); + while (!serd_cursor_equals(iter, serd_model_end(model))) { + assert(!serd_model_erase(model, iter)); + } + + assert(serd_model_empty(model)); + + serd_cursor_free(NULL, iter); + serd_model_free(model); + return 0; +} + +static int +test_clear(SerdWorld* world, const unsigned n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0U); + generate(world, model, n_quads, NULL); + + serd_model_clear(model); + assert(serd_model_empty(model)); + + serd_model_free(model); + return 0; +} + +static int +test_copy(SerdWorld* world, const unsigned n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0U); + generate(world, model, n_quads, NULL); + + SerdModel* copy = serd_model_copy(serd_world_allocator(world), model); + assert(serd_model_equals(model, copy)); + + serd_model_free(model); + serd_model_free(copy); + return 0; +} + +static int +test_equals(SerdWorld* world, const unsigned n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0U); + generate(world, model, n_quads, NULL); + serd_model_add( + model, uri(world, 0), uri(world, 1), uri(world, 2), uri(world, 3)); + + assert(serd_model_equals(NULL, NULL)); + assert(!serd_model_equals(NULL, model)); + assert(!serd_model_equals(model, NULL)); + + SerdModel* empty = serd_model_new(world, SERD_ORDER_SPO, 0U); + assert(!serd_model_equals(model, empty)); + + SerdModel* different = serd_model_new(world, SERD_ORDER_SPO, 0U); + generate(world, different, n_quads, NULL); + serd_model_add( + different, uri(world, 1), uri(world, 1), uri(world, 2), uri(world, 3)); + + assert(serd_model_size(model) == serd_model_size(different)); + assert(!serd_model_equals(model, different)); + + serd_model_free(model); + serd_model_free(empty); + serd_model_free(different); + return 0; +} + +static int +test_find_past_end(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0U); + const SerdNode* s = uri(world, 1); + const SerdNode* p = uri(world, 2); + const SerdNode* o = uri(world, 3); + assert(!serd_model_add(model, s, p, o, 0)); + assert(serd_model_ask(model, s, p, o, 0)); + + const SerdNode* huge = uri(world, 999); + SerdCursor* range = serd_model_find(NULL, model, huge, huge, huge, 0); + assert(serd_cursor_is_end(range)); + + serd_cursor_free(NULL, range); + serd_model_free(model); + return 0; +} + +static int +test_find_unknown_node(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + const SerdNode* const s = uri(world, 1); + const SerdNode* const p = uri(world, 2); + const SerdNode* const o = uri(world, 3); + + SerdModel* const model = + serd_model_new(world, SERD_ORDER_SPO, SERD_STORE_GRAPHS); + + // Add one statement + assert(!serd_model_add(model, s, p, o, NULL)); + assert(serd_model_ask(model, s, p, o, NULL)); + + /* Test searching for statements that contain a non-existent node. This is + semantically equivalent to any other non-matching pattern, but can be + implemented with a fast path that avoids searching a statement index + entirely. */ + + const SerdNode* const q = uri(world, 42); + assert(!serd_model_ask(model, s, p, o, q)); + assert(!serd_model_ask(model, s, p, q, NULL)); + assert(!serd_model_ask(model, s, q, o, NULL)); + assert(!serd_model_ask(model, q, p, o, NULL)); + + serd_model_free(model); + return 0; +} + +static int +test_find_graph(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + const SerdNode* const s = uri(world, 1); + const SerdNode* const p = uri(world, 2); + const SerdNode* const o1 = uri(world, 3); + const SerdNode* const o2 = uri(world, 4); + const SerdNode* const g = uri(world, 5); + + for (unsigned indexed = 0U; indexed < 2U; ++indexed) { + SerdModel* const model = + serd_model_new(world, SERD_ORDER_SPO, SERD_STORE_GRAPHS); + + if (indexed) { + serd_model_add_index(model, SERD_ORDER_GSPO); + } + + // Add one statement in a named graph and one in the default graph + assert(!serd_model_add(model, s, p, o1, NULL)); + assert(!serd_model_add(model, s, p, o2, g)); + + // Both statements can be found in the default graph + assert(serd_model_ask(model, s, p, o1, NULL)); + assert(serd_model_ask(model, s, p, o2, NULL)); + + // Only the one statement can be found in the named graph + assert(!serd_model_ask(model, s, p, o1, g)); + assert(serd_model_ask(model, s, p, o2, g)); + + serd_model_free(model); + } + + return 0; +} + +static int +test_range(SerdWorld* world, const unsigned n_quads) +{ + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0U); + generate(world, model, n_quads, NULL); + + SerdCursor* range1 = serd_model_begin(NULL, model); + SerdCursor* range2 = serd_model_begin(NULL, model); + + assert(!serd_cursor_is_end(range1)); + assert(serd_cursor_is_end(NULL)); + + assert(serd_cursor_equals(NULL, NULL)); + assert(!serd_cursor_equals(range1, NULL)); + assert(!serd_cursor_equals(NULL, range1)); + assert(serd_cursor_equals(range1, range2)); + + assert(!serd_cursor_advance(range2)); + assert(!serd_cursor_equals(range1, range2)); + + serd_cursor_free(NULL, range2); + serd_cursor_free(NULL, range1); + serd_model_free(model); + + return 0; +} + +static int +test_triple_index_read(SerdWorld* world, const unsigned n_quads) +{ + serd_set_log_func(world, ignore_only_index_error, NULL); + + for (unsigned i = 0; i < 6; ++i) { + SerdModel* model = serd_model_new(world, (SerdStatementOrder)i, 0U); + generate(world, model, n_quads, 0); + assert(!test_read(world, model, 0, n_quads)); + serd_model_free(model); + } + + return 0; +} + +static int +test_quad_index_read(SerdWorld* world, const unsigned n_quads) +{ + serd_set_log_func(world, ignore_only_index_error, NULL); + + for (unsigned i = 0; i < 6; ++i) { + SerdModel* model = + serd_model_new(world, (SerdStatementOrder)i, SERD_STORE_GRAPHS); + + const SerdNode* graph = uri(world, 42); + generate(world, model, n_quads, graph); + assert(!test_read(world, model, graph, n_quads)); + serd_model_free(model); + } + + return 0; +} + +static int +test_remove_graph(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdModel* model = serd_model_new(world, SERD_ORDER_GSPO, SERD_STORE_GRAPHS); + + // Generate a couple of graphs + const SerdNode* graph42 = uri(world, 42); + const SerdNode* graph43 = uri(world, 43); + generate(world, model, 1, graph42); + generate(world, model, 1, graph43); + + // Find the start of graph43 + SerdCursor* range = serd_model_find(NULL, model, NULL, NULL, NULL, graph43); + assert(range); + + // Remove the entire range of statements in the graph + SerdStatus st = serd_model_erase_statements(model, range); + assert(!st); + serd_cursor_free(NULL, range); + + // Erase the first tuple (an element in the default graph) + SerdCursor* iter = serd_model_begin(NULL, model); + assert(!serd_model_erase(model, iter)); + serd_cursor_free(NULL, iter); + + // Ensure only the other graph is left + Quad pat = {0, 0, 0, graph42}; + for (iter = serd_model_begin(NULL, model); + !serd_cursor_equals(iter, serd_model_end(model)); + serd_cursor_advance(iter)) { + const SerdStatement* const s = serd_cursor_get(iter); + assert(s); + assert(serd_statement_matches(s, pat[0], pat[1], pat[2], pat[3])); + } + serd_cursor_free(NULL, iter); + + serd_model_free(model); + return 0; +} + +static int +test_default_graph(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + const SerdNode* s = uri(world, 1); + const SerdNode* p = uri(world, 2); + const SerdNode* o = uri(world, 3); + const SerdNode* g1 = uri(world, 101); + const SerdNode* g2 = uri(world, 102); + + { + // Make a model that does not store graphs + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0U); + + // Insert a statement into a graph (which will be dropped) + assert(!serd_model_add(model, s, p, o, g1)); + + // Attempt to insert the same statement into another graph + assert(serd_model_add(model, s, p, o, g2) == SERD_FAILURE); + + // Ensure that we only see the statement once + assert(serd_model_count(model, s, p, o, NULL) == 1); + + serd_model_free(model); + } + + { + // Make a model that stores graphs + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, SERD_STORE_GRAPHS); + + // Insert the same statement into two graphs + assert(!serd_model_add(model, s, p, o, g1)); + assert(!serd_model_add(model, s, p, o, g2)); + + // Ensure we see the statement twice + assert(serd_model_count(model, s, p, o, NULL) == 2); + + serd_model_free(model); + } + + return 0; +} + +static int +test_write_flat_range(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdAllocator* const alloc = serd_world_allocator(world); + + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, SERD_STORE_GRAPHS); + SerdNodes* nodes = serd_nodes_new(alloc); + + const SerdNode* s = serd_nodes_get(nodes, serd_a_uri_string("urn:s")); + const SerdNode* p = serd_nodes_get(nodes, serd_a_uri_string("urn:p")); + const SerdNode* b1 = serd_nodes_get(nodes, serd_a_blank(serd_string("b1"))); + const SerdNode* b2 = serd_nodes_get(nodes, serd_a_blank(serd_string("b2"))); + const SerdNode* o = serd_nodes_get(nodes, serd_a_uri_string("urn:o")); + + serd_model_add(model, s, p, b1, NULL); + serd_model_add(model, b1, p, o, NULL); + serd_model_add(model, s, p, b2, NULL); + serd_model_add(model, b2, p, o, NULL); + + SerdBuffer buffer = {NULL, NULL, 0}; + SerdEnv* env = serd_env_new(alloc, serd_empty_string()); + SerdOutputStream out = serd_open_output_buffer(&buffer); + + SerdWriter* writer = serd_writer_new(world, SERD_TURTLE, 0, env, &out, 1); + assert(writer); + + SerdCursor* all = serd_model_begin(NULL, model); + for (const SerdStatement* t = NULL; (t = serd_cursor_get(all)); + serd_cursor_advance(all)) { + serd_sink_write_statement(serd_writer_sink(writer), 0U, t); + } + serd_cursor_free(NULL, all); + + serd_writer_finish(writer); + serd_close_output(&out); + + const char* const str = (const char*)buffer.buf; + static const char* const expected = "<urn:s>\n" + "\t<urn:p> _:b1 ,\n" + "\t\t_:b2 .\n" + "\n" + "_:b1\n" + "\t<urn:p> <urn:o> .\n" + "\n" + "_:b2\n" + "\t<urn:p> <urn:o> .\n"; + + assert(str); + assert(!strcmp(str, expected)); + + serd_free(NULL, buffer.buf); + serd_writer_free(writer); + serd_model_free(model); + serd_env_free(env); + serd_nodes_free(nodes); + return 0; +} + +static int +test_write_bad_list(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdAllocator* const alloc = serd_world_allocator(world); + + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, SERD_STORE_GRAPHS); + SerdNodes* nodes = serd_nodes_new(alloc); + + serd_model_add_index(model, SERD_ORDER_OPS); + + const SerdNode* s = serd_nodes_get(nodes, serd_a_uri_string("urn:s")); + const SerdNode* p = serd_nodes_get(nodes, serd_a_uri_string("urn:p")); + + const SerdNode* list1 = + serd_nodes_get(nodes, serd_a_blank(serd_string("l1"))); + + const SerdNode* list2 = + serd_nodes_get(nodes, serd_a_blank(serd_string("l2"))); + + const SerdNode* nofirst = + serd_nodes_get(nodes, serd_a_blank(serd_string("nof"))); + + const SerdNode* norest = + serd_nodes_get(nodes, serd_a_blank(serd_string("nor"))); + + const SerdNode* pfirst = serd_nodes_get(nodes, serd_a_uri_string(RDF_FIRST)); + const SerdNode* prest = serd_nodes_get(nodes, serd_a_uri_string(RDF_REST)); + + const SerdNode* val1 = serd_nodes_get(nodes, serd_a_string("a")); + const SerdNode* val2 = serd_nodes_get(nodes, serd_a_string("b")); + + // List where second node has no rdf:first + serd_model_add(model, s, p, list1, NULL); + serd_model_add(model, list1, pfirst, val1, NULL); + serd_model_add(model, list1, prest, nofirst, NULL); + + // List where second node has no rdf:rest + serd_model_add(model, s, p, list2, NULL); + serd_model_add(model, list2, pfirst, val1, NULL); + serd_model_add(model, list2, prest, norest, NULL); + serd_model_add(model, norest, pfirst, val2, NULL); + + SerdBuffer buffer = {NULL, NULL, 0}; + SerdEnv* env = serd_env_new(alloc, serd_empty_string()); + SerdOutputStream out = serd_open_output_buffer(&buffer); + + SerdWriter* writer = serd_writer_new(world, SERD_TURTLE, 0, env, &out, 1); + assert(writer); + + SerdCursor* all = serd_model_begin(NULL, model); + serd_describe_range(NULL, all, serd_writer_sink(writer), 0); + serd_cursor_free(NULL, all); + + serd_writer_finish(writer); + serd_close_output(&out); + + const char* str = (const char*)buffer.buf; + const char* expected = "<urn:s>\n" + " <urn:p> (\n" + " \"a\"\n" + " ) , (\n" + " \"a\"\n" + " \"b\"\n" + " ) .\n"; + + assert(str); + assert(!strcmp(str, expected)); + + serd_free(NULL, buffer.buf); + serd_writer_free(writer); + serd_close_output(&out); + serd_model_free(model); + serd_env_free(env); + serd_nodes_free(nodes); + return 0; +} + +static int +test_write_infinite_list(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdAllocator* const alloc = serd_world_allocator(world); + + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, SERD_STORE_GRAPHS); + SerdNodes* nodes = serd_nodes_new(alloc); + + serd_model_add_index(model, SERD_ORDER_OPS); + + const SerdNode* s = serd_nodes_get(nodes, serd_a_uri_string("urn:s")); + const SerdNode* p = serd_nodes_get(nodes, serd_a_uri_string("urn:p")); + + const SerdNode* list1 = + serd_nodes_get(nodes, serd_a_blank(serd_string("l1"))); + + const SerdNode* list2 = + serd_nodes_get(nodes, serd_a_blank(serd_string("l2"))); + + const SerdNode* pfirst = serd_nodes_get(nodes, serd_a_uri_string(RDF_FIRST)); + const SerdNode* prest = serd_nodes_get(nodes, serd_a_uri_string(RDF_REST)); + const SerdNode* val1 = serd_nodes_get(nodes, serd_a_string("a")); + const SerdNode* val2 = serd_nodes_get(nodes, serd_a_string("b")); + + // List with a cycle: list1 -> list2 -> list1 -> list2 ... + serd_model_add(model, s, p, list1, NULL); + serd_model_add(model, list1, pfirst, val1, NULL); + serd_model_add(model, list1, prest, list2, NULL); + serd_model_add(model, list2, pfirst, val2, NULL); + serd_model_add(model, list2, prest, list1, NULL); + + SerdBuffer buffer = {NULL, NULL, 0}; + SerdEnv* env = serd_env_new(alloc, serd_empty_string()); + SerdOutputStream out = serd_open_output_buffer(&buffer); + + SerdWriter* writer = serd_writer_new(world, SERD_TURTLE, 0, env, &out, 1); + assert(writer); + + serd_env_set_prefix( + env, + serd_string("rdf"), + serd_string("http://www.w3.org/1999/02/22-rdf-syntax-ns#")); + + SerdCursor* all = serd_model_begin(NULL, model); + serd_describe_range(NULL, all, serd_writer_sink(writer), 0); + serd_cursor_free(NULL, all); + + serd_writer_finish(writer); + serd_close_output(&out); + const char* str = (const char*)buffer.buf; + const char* expected = "<urn:s>\n" + " <urn:p> _:l1 .\n" + "\n" + "_:l1\n" + " rdf:first \"a\" ;\n" + " rdf:rest [\n" + " rdf:first \"b\" ;\n" + " rdf:rest _:l1\n" + " ] .\n"; + + assert(str); + assert(!strcmp(str, expected)); + + serd_free(NULL, buffer.buf); + serd_writer_free(writer); + serd_close_output(&out); + serd_model_free(model); + serd_env_free(env); + serd_nodes_free(nodes); + + return 0; +} + +typedef struct { + size_t n_written; + size_t max_successes; +} FailingWriteFuncState; + +/// Write function that fails after a certain number of writes +static size_t +failing_write_func(const void* buf, size_t size, size_t nmemb, void* stream) +{ + (void)buf; + (void)size; + (void)nmemb; + + FailingWriteFuncState* state = (FailingWriteFuncState*)stream; + + return (++state->n_written > state->max_successes) ? 0 : nmemb; +} + +static int +test_write_error_in_list_subject(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdAllocator* const alloc = serd_world_allocator(world); + + serd_set_log_func(world, expected_error, NULL); + + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0U); + SerdNodes* nodes = serd_nodes_new(alloc); + + serd_model_add_index(model, SERD_ORDER_OPS); + + const SerdNode* p = serd_nodes_get(nodes, serd_a_uri_string("urn:p")); + const SerdNode* o = serd_nodes_get(nodes, serd_a_uri_string("urn:o")); + const SerdNode* l1 = serd_nodes_get(nodes, serd_a_blank(serd_string("l1"))); + const SerdNode* one = serd_nodes_get(nodes, serd_a_integer(1)); + const SerdNode* l2 = serd_nodes_get(nodes, serd_a_blank(serd_string("l2"))); + const SerdNode* two = serd_nodes_get(nodes, serd_a_integer(2)); + + const SerdNode* rdf_first = + serd_nodes_get(nodes, serd_a_uri_string(RDF_FIRST)); + + const SerdNode* rdf_rest = serd_nodes_get(nodes, serd_a_uri_string(RDF_REST)); + + const SerdNode* rdf_nil = serd_nodes_get(nodes, serd_a_uri_string(RDF_NIL)); + + serd_model_add(model, l1, rdf_first, one, NULL); + serd_model_add(model, l1, rdf_rest, l2, NULL); + serd_model_add(model, l2, rdf_first, two, NULL); + serd_model_add(model, l2, rdf_rest, rdf_nil, NULL); + serd_model_add(model, l1, p, o, NULL); + + SerdEnv* env = serd_env_new(alloc, serd_empty_string()); + + for (size_t max_successes = 0; max_successes < 18; ++max_successes) { + FailingWriteFuncState state = {0, max_successes}; + SerdOutputStream out = + serd_open_output_stream(failing_write_func, NULL, NULL, &state); + + SerdWriter* writer = serd_writer_new(world, SERD_TURTLE, 0, env, &out, 1); + + const SerdSink* const sink = serd_writer_sink(writer); + SerdCursor* const all = serd_model_begin(NULL, model); + const SerdStatus st = serd_describe_range(NULL, all, sink, 0); + serd_cursor_free(NULL, all); + + assert(st == SERD_BAD_WRITE); + + serd_writer_free(writer); + serd_close_output(&out); + } + + serd_env_free(env); + serd_model_free(model); + serd_nodes_free(nodes); + return 0; +} + +static int +test_write_error_in_list_object(SerdWorld* world, const unsigned n_quads) +{ + (void)n_quads; + + SerdAllocator* const alloc = serd_world_allocator(world); + + serd_set_log_func(world, expected_error, NULL); + + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0U); + SerdNodes* nodes = serd_nodes_new(alloc); + + serd_model_add_index(model, SERD_ORDER_OPS); + + const SerdNode* s = serd_nodes_get(nodes, serd_a_uri_string("urn:s")); + const SerdNode* p = serd_nodes_get(nodes, serd_a_uri_string("urn:p")); + const SerdNode* l1 = serd_nodes_get(nodes, serd_a_blank(serd_string("l1"))); + const SerdNode* one = serd_nodes_get(nodes, serd_a_integer(1)); + const SerdNode* l2 = serd_nodes_get(nodes, serd_a_blank(serd_string("l2"))); + const SerdNode* two = serd_nodes_get(nodes, serd_a_integer(2)); + + const SerdNode* rdf_first = + serd_nodes_get(nodes, serd_a_uri_string(RDF_FIRST)); + + const SerdNode* rdf_rest = serd_nodes_get(nodes, serd_a_uri_string(RDF_REST)); + + const SerdNode* rdf_nil = serd_nodes_get(nodes, serd_a_uri_string(RDF_NIL)); + + serd_model_add(model, s, p, l1, NULL); + serd_model_add(model, l1, rdf_first, one, NULL); + serd_model_add(model, l1, rdf_rest, l2, NULL); + serd_model_add(model, l2, rdf_first, two, NULL); + serd_model_add(model, l2, rdf_rest, rdf_nil, NULL); + + SerdEnv* env = serd_env_new(alloc, serd_empty_string()); + + for (size_t max_successes = 0; max_successes < 21; ++max_successes) { + FailingWriteFuncState state = {0, max_successes}; + SerdOutputStream out = + serd_open_output_stream(failing_write_func, NULL, NULL, &state); + + SerdWriter* writer = serd_writer_new(world, SERD_TURTLE, 0, env, &out, 1); + + const SerdSink* const sink = serd_writer_sink(writer); + SerdCursor* const all = serd_model_begin(NULL, model); + const SerdStatus st = serd_describe_range(NULL, all, sink, 0); + serd_cursor_free(NULL, all); + + assert(st == SERD_BAD_WRITE); + + serd_writer_free(writer); + serd_close_output(&out); + } + + serd_env_free(env); + serd_model_free(model); + serd_nodes_free(nodes); + return 0; +} + +int +main(void) +{ + static const unsigned n_quads = 300; + + serd_model_free(NULL); // Shouldn't crash + + typedef int (*TestFunc)(SerdWorld*, unsigned); + + const TestFunc tests[] = {test_failed_new_alloc, + test_free_null, + test_get_world, + test_get_default_order, + test_get_flags, + test_all_begin, + test_begin_ordered, + test_add_with_iterator, + test_add_remove_nodes, + test_add_index, + test_remove_index, + test_inserter, + test_erase_with_iterator, + test_add_erase, + test_add_bad_statement, + test_add_with_caret, + test_erase_all, + test_clear, + test_copy, + test_equals, + test_find_past_end, + test_find_unknown_node, + test_find_graph, + test_range, + test_triple_index_read, + test_quad_index_read, + test_remove_graph, + test_default_graph, + test_write_flat_range, + test_write_bad_list, + test_write_infinite_list, + test_write_error_in_list_subject, + test_write_error_in_list_object, + NULL}; + + SerdWorld* world = serd_world_new(NULL); + int ret = 0; + + for (const TestFunc* t = tests; *t; ++t) { + serd_set_log_func(world, NULL, NULL); + ret += (*t)(world, n_quads); + } + + serd_world_free(world); + return ret; +} diff --git a/test/test_sort.py b/test/test_sort.py new file mode 100755 index 00000000..78147fbc --- /dev/null +++ b/test/test_sort.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 + +# Copyright 2022-2023 David Robillard <d@drobilla.net> +# SPDX-License-Identifier: ISC + +"""Run the collation tests for serd-sort.""" + +import os +import shlex +import subprocess +import sys + +import serd_test_util as util + +collations = [ + "GOPS", + "GOSP", + "GPSO", + "GSOP", + "GSPO", + "OPS", + "OSP", + "POS", + "PSO", + "SOP", + "SPO", + "pretty", +] + + +def run_sort_test(command, in_path, good_path): + """Sort a single input in the named order and check the output. + + The expected output is assumed to exist at test_dir/NAME.untyped.nq. + """ + + result_name = os.path.basename(good_path) + options = [] + if result_name not in ["pretty.nq", "untyped.nq"]: + options += ["-c", os.path.splitext(result_name)[0]] + + command = command + options + [in_path] + + proc = subprocess.run( + command, check=True, encoding="utf-8", capture_output=True + ) + + lines = proc.stdout.splitlines(True) + with open(good_path, "r", encoding="utf-8") as good: + return util.lines_equal(list(good), lines, good_path, result_name) + + +def run_tests(test_dir, command): + """Run all the tests in the suite.""" + + n_failures = 0 + in_path = os.path.join(test_dir, "input.trig") + + # Test all the basic collations, and "pretty" with type first + for name in collations: + good_path = os.path.join(test_dir, name + ".nq") + prefixes = [command, command + ["-I", "trig"]] + for prefix in prefixes: + if not run_sort_test(prefix, in_path, good_path): + n_failures += 1 + + # Test "pretty" without type first + if not run_sort_test( + command + ["-O", "longhand"], + in_path, + os.path.join(test_dir, "untyped.nq"), + ): + n_failures += 1 + + return n_failures + + +def main(): + """Run the command line tool.""" + + args = util.wrapper_args(__doc__, True) + wrapper_prefix = shlex.split(args.wrapper) + command_prefix = wrapper_prefix + [args.tool] + + return run_tests(os.path.dirname(args.input), command_prefix) + + +if __name__ == "__main__": + try: + sys.exit(main()) + except subprocess.CalledProcessError as error: + if error.stderr is not None: + sys.stderr.write(error.stderr) + + sys.stderr.write(sys.argv[0]) + sys.stderr.write(": error: ") + sys.stderr.write(str(error)) + sys.stderr.write("\n") + sys.exit(error.returncode) diff --git a/test/test_string.c b/test/test_string.c index 2d16936a..286df8ca 100644 --- a/test/test_string.c +++ b/test/test_string.c @@ -14,7 +14,7 @@ test_strerror(void) { const char* msg = serd_strerror(SERD_SUCCESS); assert(!strcmp(msg, "Success")); - for (int i = SERD_FAILURE; i <= SERD_BAD_PATTERN; ++i) { + for (int i = SERD_FAILURE; i <= SERD_BAD_INDEX; ++i) { msg = serd_strerror((SerdStatus)i); assert(strcmp(msg, "Success")); } diff --git a/tools/console.c b/tools/console.c index 0f66a2f1..0aef9c01 100644 --- a/tools/console.c +++ b/tools/console.c @@ -31,6 +31,23 @@ #define MAX_DEPTH 128U +typedef struct LogLevelLabels { + const char* number; + const char* symbol; + const char* name; +} LogLevelLabels; + +static const LogLevelLabels log_level_strings[] = { + {"0", "emerg", "emergency"}, + {"1", "alert", NULL}, + {"2", "crit", "critical"}, + {"3", "err", "error"}, + {"4", "warn", "warning"}, + {"5", "note", "notice"}, + {"6", "info", NULL}, + {"7", "debug", NULL}, +}; + ZIX_PURE_FUNC bool serd_option_iter_is_end(const OptionIter iter) { @@ -49,6 +66,21 @@ serd_option_iter_advance(OptionIter* const iter) return SERD_SUCCESS; } +SerdCommonOptions +serd_default_options(void) +{ + const SerdCommonOptions opts = { + "", + NULL, + 4096U, + 1048576U, + {SERD_SYNTAX_EMPTY, 0U, false}, + {SERD_SYNTAX_EMPTY, 0U, false}, + SERD_LOG_LEVEL_NOTICE, + }; + return opts; +} + SerdStatus serd_tool_setup(SerdTool* const tool, const char* const program, @@ -336,6 +368,28 @@ serd_parse_output_argument(OptionIter* const iter, return st; } +static SerdStatus +serd_parse_log_level_argument(OptionIter* const iter, + SerdLogLevel* const log_level) +{ + SerdStatus st = SERD_SUCCESS; + const char* argument = NULL; + + if (!(st = serd_get_argument(iter, &argument))) { + fprintf(stderr, "LOG LEVEL: %s\n", argument); + for (unsigned i = 0U; i < (unsigned)SERD_LOG_LEVEL_DEBUG; ++i) { + const LogLevelLabels* const labels = &log_level_strings[i]; + if (!strcmp(argument, labels->number) || + !strcmp(argument, labels->symbol) || + (labels->name && !strcmp(argument, labels->name))) { + *log_level = (SerdLogLevel)i; + } + } + } + + return st; +} + SerdStatus serd_parse_common_option(OptionIter* const iter, SerdCommonOptions* const opts) { @@ -359,6 +413,9 @@ serd_parse_common_option(OptionIter* const iter, SerdCommonOptions* const opts) case 'o': return serd_get_argument(iter, &opts->out_filename); + case 'l': + return serd_parse_log_level_argument(iter, &opts->log_level); + default: break; } diff --git a/tools/console.h b/tools/console.h index d475aebc..c8c68411 100644 --- a/tools/console.h +++ b/tools/console.h @@ -6,6 +6,7 @@ #include "serd/env.h" #include "serd/input_stream.h" +#include "serd/log.h" #include "serd/memory.h" #include "serd/output_stream.h" #include "serd/reader.h" @@ -43,6 +44,7 @@ typedef struct { size_t stack_size; SerdSyntaxOptions input; SerdSyntaxOptions output; + SerdLogLevel log_level; } SerdCommonOptions; // Common "global" state of a command-line tool that writes data @@ -59,6 +61,9 @@ serd_option_iter_is_end(OptionIter iter); SerdStatus serd_option_iter_advance(OptionIter* iter); +SerdCommonOptions +serd_default_options(void); + SerdStatus serd_tool_setup(SerdTool* tool, const char* program, SerdCommonOptions options); diff --git a/tools/meson.build b/tools/meson.build index 43902c74..af47f217 100644 --- a/tools/meson.build +++ b/tools/meson.build @@ -26,5 +26,15 @@ serd_pipe = executable( link_args: tool_link_args, ) +serd_sort = executable( + 'serd-sort', + files('console.c', 'serd-sort.c'), + c_args: tool_c_args, + dependencies: [serd_dep, zix_dep], + install: true, + link_args: tool_link_args, +) + meson.override_find_program('serd-filter', serd_filter) meson.override_find_program('serd-pipe', serd_pipe) +meson.override_find_program('serd-sort', serd_sort) diff --git a/tools/serd-filter.c b/tools/serd-filter.c index 01834e5a..70d7b68c 100644 --- a/tools/serd-filter.c +++ b/tools/serd-filter.c @@ -274,17 +274,7 @@ main(int argc, char** argv) char default_input[] = "-"; char* default_inputs[] = {default_input}; - Options opts = {{"", - NULL, - 4096U, - 1048576U, - {SERD_SYNTAX_EMPTY, 0U, false}, - {SERD_NQUADS, 0U, false}}, - NULL, - NULL, - NULL, - 0U, - false}; + Options opts = {serd_default_options(), NULL, NULL, NULL, 0U, false}; // Parse all command line options (which must precede inputs) SerdStatus st = SERD_SUCCESS; diff --git a/tools/serd-pipe.c b/tools/serd-pipe.c index bbed9fa8..fb1586b4 100644 --- a/tools/serd-pipe.c +++ b/tools/serd-pipe.c @@ -108,8 +108,8 @@ print_usage(const char* const name, const bool error) " -b BYTES I/O block size.\n" " -h Display this help and exit.\n" " -k BYTES Parser stack size.\n" + " -l LEVEL Maximum log level: 0 to 7, or emerg to debug.\n" " -o FILENAME Write output to FILENAME instead of stdout.\n" - " -q Suppress warning and error output.\n" " -s STRING Parse STRING as input.\n"; FILE* const os = error ? stderr : stdout; @@ -179,18 +179,7 @@ main(const int argc, char* const* const argv) char default_input[] = {'-', '\0'}; char* default_inputs[] = {default_input}; - Options opts = {{"", - NULL, - 4096U, - 1048576U, - {SERD_SYNTAX_EMPTY, 0U, false}, - {SERD_SYNTAX_EMPTY, 0U, false}}, - "", - NULL, - NULL, - 0U, - false, - false}; + Options opts = {serd_default_options(), "", NULL, NULL, 0U, false, false}; // Parse all command line options (which must precede inputs) SerdStatus st = SERD_SUCCESS; diff --git a/tools/serd-sort.c b/tools/serd-sort.c new file mode 100644 index 00000000..3b9c829a --- /dev/null +++ b/tools/serd-sort.c @@ -0,0 +1,267 @@ +// Copyright 2011-2023 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#include "console.h" + +#include "serd/cursor.h" +#include "serd/describe.h" +#include "serd/env.h" +#include "serd/inserter.h" +#include "serd/model.h" +#include "serd/reader.h" +#include "serd/sink.h" +#include "serd/statement.h" +#include "serd/status.h" +#include "serd/syntax.h" +#include "serd/writer.h" +#include "zix/attributes.h" + +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> + +/* Application (after parsing command-line arguments) */ + +// All options +typedef struct { + SerdCommonOptions common; + const char* collation; + char* const* inputs; + intptr_t n_inputs; + SerdStatementOrder order; + SerdDescribeFlags flags; +} Options; + +ZIX_PURE_FUNC static bool +input_has_graphs(const Options opts) +{ + if (opts.common.input.syntax) { + return serd_syntax_has_graphs(opts.common.input.syntax); + } + + for (intptr_t i = 0U; i < opts.n_inputs; ++i) { + if (serd_syntax_has_graphs(serd_guess_syntax(opts.inputs[i]))) { + return true; + } + } + + return false; +} + +// Run the tool using the given options +static SerdStatus +run(const Options opts) +{ + SerdTool app = {{NULL, NULL, NULL, NULL}, NULL, NULL, NULL}; + + // Set up the writing environment + SerdStatus st = SERD_SUCCESS; + if ((st = serd_tool_setup(&app, "serd-sort", opts.common))) { + serd_tool_cleanup(app); + return st; + } + + // Determine the default order to store statements in the model + const bool with_graphs = input_has_graphs(opts); + const SerdStatementOrder default_order = opts.collation ? opts.order + : with_graphs ? SERD_ORDER_GSPO + : SERD_ORDER_SPO; + + const SerdModelFlags flags = + (SerdModelFlags)(with_graphs * SERD_STORE_GRAPHS); + + SerdModel* const model = serd_model_new(app.world, default_order, flags); + + if (!opts.collation) { + // If we are pretty-printing, we need an O** index + serd_model_add_index(model, SERD_ORDER_OPS); + + if (with_graphs) { + // If we have graphs we still need the SPO index for finding subjects + serd_model_add_index(model, SERD_ORDER_SPO); + } + } + + // Read all the inputs into an inserter to load the model + SerdSink* const inserter = serd_inserter_new(model, NULL); + if (st || (st = serd_read_inputs(app.world, + opts.common, + app.env, + opts.n_inputs, + opts.inputs, + inserter))) { + serd_sink_free(inserter); + serd_model_free(model); + serd_tool_cleanup(app); + return st; + } + + // Write the model to the output + const SerdSink* const target = serd_writer_sink(app.writer); + if (opts.collation) { + SerdCursor* const cursor = + serd_model_begin_ordered(NULL, model, opts.order); + + st = serd_env_write_prefixes(app.env, target); + + for (const SerdStatement* statement = NULL; + !st && (statement = serd_cursor_get(cursor)); + serd_cursor_advance(cursor)) { + st = serd_sink_write_statement(target, 0U, statement); + } + + serd_cursor_free(NULL, cursor); + } else { + SerdCursor* const cursor = serd_model_begin(NULL, model); + + if (!(st = serd_env_write_prefixes(app.env, target))) { + st = serd_describe_range(NULL, cursor, target, opts.flags); + } + + serd_cursor_free(NULL, cursor); + } + + if (!st) { + st = serd_writer_finish(app.writer); + } + + serd_sink_free(inserter); + serd_model_free(model); + + const SerdStatus cst = serd_tool_cleanup(app); + return st ? st : cst; +} + +/* Command-line interface (before setting up serd) */ + +static SerdStatus +parse_statement_order(const char* const string, SerdStatementOrder* const order) +{ + static const char* const strings[] = {"SPO", + "SOP", + "OPS", + "OSP", + "PSO", + "POS", + "GSPO", + "GSOP", + "GOPS", + "GOSP", + "GPSO", + "GPOS", + NULL}; + + for (unsigned i = 0; strings[i]; ++i) { + if (!strcmp(string, strings[i])) { + *order = (SerdStatementOrder)i; + return SERD_SUCCESS; + } + } + + return SERD_BAD_ARG; +} + +static int +print_usage(const char* const name, const bool error) +{ + static const char* const description = + "Reorder RDF data by loading everything into a model then writing it.\n" + "INPUT can be a local filename, or \"-\" to read from standard input.\n\n" + " -B BASE_URI Base URI or path for resolving relative references.\n" + " -I SYNTAX Input syntax turtle/ntriples/trig/nquads, or option\n" + " lax/variables/relative/global/generated.\n" + " -O SYNTAX Output syntax empty/turtle/ntriples/nquads, or option\n" + " ascii/expanded/verbatim/terse/lax.\n" + " -V Display version information and exit.\n" + " -b BYTES I/O block size.\n" + " -c COLLATION An optional \"G\" then the letters \"SPO\" in any order.\n" + " -h Display this help and exit.\n" + " -k BYTES Parser stack size.\n" + " -o FILENAME Write output to FILENAME instead of stdout.\n" + " -t Do not write type as \"a\" before other properties.\n"; + + FILE* const os = error ? stderr : stdout; + fprintf(os, "%s", error ? "\n" : ""); + fprintf(os, "Usage: %s [OPTION]... [INPUT]...\n", name); + fprintf(os, "%s", description); + return error; +} + +// Parse the option pointed to by `iter`, and advance it to the next one +static SerdStatus +parse_option(OptionIter* const iter, Options* const opts) +{ +#define ARG_ERRORF(fmt, ...) \ + fprintf(stderr, "%s: " fmt, iter->argv[0], __VA_ARGS__) + + SerdStatus st = serd_parse_common_option(iter, &opts->common); + if (st != SERD_FAILURE) { + return st; + } + + const char opt = iter->argv[iter->a][iter->f]; + switch (opt) { + case 'V': + return serd_print_version("serd-sort"); + + case 'c': + if (!(st = serd_get_argument(iter, &opts->collation))) { + if ((st = parse_statement_order(opts->collation, &opts->order))) { + ARG_ERRORF("unknown collation \"%s\"\n", opts->collation); + return st; + } + } + return st; + + case 'h': + print_usage(iter->argv[0], false); + return SERD_FAILURE; + + default: + break; + } + + ARG_ERRORF("invalid option -- '%c'\n", opt); + return SERD_BAD_ARG; + +#undef ARG_ERRORF +} + +int +main(const int argc, char* const* const argv) +{ + char default_input[] = "-"; + char* default_inputs[] = {default_input}; + + Options opts = {serd_default_options(), NULL, NULL, 0U, SERD_ORDER_SPO, 0U}; + + // Parse all command line options (which must precede inputs) + SerdStatus st = SERD_SUCCESS; + OptionIter iter = {argv, argc, 1, 1}; + while (!serd_option_iter_is_end(iter)) { + if ((st = parse_option(&iter, &opts))) { + return (st == SERD_FAILURE) ? 0 : print_usage(argv[0], true); + } + } + + // Order statements to match longhand mode if necessary + if (opts.common.output.flags & SERD_WRITE_LONGHAND) { + opts.flags |= SERD_NO_TYPE_FIRST; + } + + // Every argument past the last option is an input + opts.inputs = argv + iter.a; + opts.n_inputs = argc - iter.a; + if (opts.n_inputs == 0) { + opts.n_inputs = 1; + opts.inputs = default_inputs; + } + + // Don't add prefixes to blank node labels if there is only one input + if (opts.n_inputs == 1) { + opts.common.input.flags |= SERD_READ_GLOBAL; + } + + return run(opts) > SERD_FAILURE; +} |