diff options
author | David Robillard <d@drobilla.net> | 2023-03-31 17:17:41 -0400 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2023-12-02 18:49:08 -0500 |
commit | b5956c4dc6b065d664908104d5fc6752a87e3364 (patch) | |
tree | 6be1fa515891e759092bb9bea082e27c78bfb6de /src/describe.c | |
parent | 439d6ec3d6dfbea74334beace790f500e61c9b7d (diff) | |
download | serd-b5956c4dc6b065d664908104d5fc6752a87e3364.tar.gz serd-b5956c4dc6b065d664908104d5fc6752a87e3364.tar.bz2 serd-b5956c4dc6b065d664908104d5fc6752a87e3364.zip |
Add model and serd-sort utility
With all the new functionality, the complexity of the serd-pipe command-line
interface is starting to push the limits of available flags. So, instead of
grafting on further options to control a model, this commit adds a new tool,
serd-sort, which acts somewhat like a stripped-down serd-pipe that stores
statements in a model in memory.
This keeps the complexity (including the user-facing complexity) of any one
tool down, since other more focused tools can be used for streaming tasks in a
pipeline.
In other words, abandon Swissarmyknifeism, take a page from the Unix
philosophy, and try to expose the model functionality to the command-line in a
dedicated focused tool. The model implementation is tested by using this tool
to run a subset of the usual test suites, and a special suite to test statement
sorting.
Diffstat (limited to 'src/describe.c')
-rw-r--r-- | src/describe.c | 314 |
1 files changed, 314 insertions, 0 deletions
diff --git a/src/describe.c b/src/describe.c new file mode 100644 index 00000000..a7012cf9 --- /dev/null +++ b/src/describe.c @@ -0,0 +1,314 @@ +// Copyright 2011-2023 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#include "cursor.h" +#include "model.h" +#include "world.h" + +// Define the types used in the hash interface for more type safety +#define ZIX_HASH_KEY_TYPE const SerdNode +#define ZIX_HASH_RECORD_TYPE const SerdNode + +#include "serd/cursor.h" +#include "serd/describe.h" +#include "serd/memory.h" +#include "serd/model.h" +#include "serd/node.h" +#include "serd/sink.h" +#include "serd/statement.h" +#include "serd/status.h" +#include "serd/world.h" +#include "zix/allocator.h" +#include "zix/attributes.h" +#include "zix/digest.h" +#include "zix/hash.h" +#include "zix/status.h" + +#include <assert.h> +#include <stdbool.h> +#include <stddef.h> + +typedef enum { NAMED, ANON_S, ANON_O, LIST_S, LIST_O } NodeStyle; + +typedef struct { + SerdAllocator* allocator; // Allocator for auxiliary structures + const SerdModel* model; // Model to read from + const SerdSink* sink; // Sink to write description to + ZixHash* list_subjects; // Nodes written in the current list or null + SerdDescribeFlags flags; // Flags to control description +} DescribeContext; + +static SerdStatus +write_range_statement(const DescribeContext* ctx, + unsigned depth, + SerdStatementFlags statement_flags, + const SerdStatement* statement, + const SerdNode* last_subject, + bool write_types); + +static NodeStyle +get_node_style(const SerdModel* const model, const SerdNode* const node) +{ + if (serd_node_type(node) != SERD_BLANK) { + return NAMED; // Non-blank node can't be anonymous + } + + const size_t n_as_object = serd_model_count(model, NULL, NULL, node, NULL); + if (n_as_object > 1) { + return NAMED; // Blank node referred to several times + } + + if (serd_model_count(model, node, model->world->rdf_first, NULL, NULL) == 1 && + serd_model_count(model, node, model->world->rdf_rest, NULL, NULL) == 1 && + !serd_model_ask(model, NULL, model->world->rdf_rest, node, NULL)) { + return n_as_object == 0 ? LIST_S : LIST_O; + } + + return n_as_object == 0 ? ANON_S : ANON_O; +} + +static const SerdNode* +identity(const SerdNode* const record) +{ + return record; +} + +static ZixHashCode +ptr_hash(const SerdNode* const ptr) +{ + return zix_digest(0U, &ptr, sizeof(SerdNode*)); +} + +static bool +ptr_equals(const SerdNode* const a, const SerdNode* const b) +{ + return *(const void* const*)a == *(const void* const*)b; +} + +static SerdStatus +write_pretty_range(const DescribeContext* const ctx, + const unsigned depth, + SerdCursor* const range, + const SerdNode* last_subject, + bool write_types) +{ + SerdStatus st = SERD_SUCCESS; + const SerdStatement* statement = serd_cursor_get(range); + + while (statement) { + // Write this statement (and possibly more to describe anonymous nodes) + if ((st = write_range_statement( + ctx, depth, 0U, statement, last_subject, write_types))) { + break; + } + + // Update the last subject and advance the cursor + last_subject = serd_statement_subject(statement); + st = serd_cursor_advance(range); + statement = serd_cursor_get(range); + } + + return st > SERD_FAILURE ? st : SERD_SUCCESS; +} + +static SerdStatus +write_list(const DescribeContext* const ctx, + const unsigned depth, + SerdStatementFlags flags, + const SerdNode* node, + const SerdNode* const graph) +{ + const SerdModel* const model = ctx->model; + const SerdWorld* const world = model->world; + const SerdSink* const sink = ctx->sink; + const SerdNode* const rdf_first = world->rdf_first; + const SerdNode* const rdf_rest = world->rdf_rest; + const SerdNode* const rdf_nil = world->rdf_nil; + SerdStatus st = SERD_SUCCESS; + + const SerdStatement* fs = + serd_model_get_statement(model, node, rdf_first, NULL, graph); + + assert(fs); // Shouldn't get here if it doesn't at least have an rdf:first + + while (!st && !serd_node_equals(node, rdf_nil)) { + // Write rdf:first statement for this node + if ((st = write_range_statement(ctx, depth, flags, fs, NULL, false))) { + return st; + } + + // Get rdf:rest statement + const SerdStatement* const rs = + serd_model_get_statement(model, node, rdf_rest, NULL, graph); + + if (!rs) { + // Terminate malformed list with missing rdf:rest + return serd_sink_write(sink, 0, node, rdf_rest, rdf_nil, graph); + } + + // Terminate if the next node has no rdf:first + const SerdNode* const next = serd_statement_object(rs); + if (!(fs = serd_model_get_statement(model, next, rdf_first, NULL, graph))) { + return serd_sink_write(sink, 0, node, rdf_rest, rdf_nil, graph); + } + + // Write rdf:next statement and move to the next node + st = serd_sink_write_statement(sink, 0, rs); + node = next; + flags = 0U; + } + + return st; +} + +static bool +skip_range_statement(const SerdModel* const model, + const SerdStatement* const statement) +{ + const SerdNode* const subject = serd_statement_subject(statement); + const NodeStyle subject_style = get_node_style(model, subject); + const SerdNode* const predicate = serd_statement_predicate(statement); + + if (subject_style == ANON_O || subject_style == LIST_O) { + return true; // Skip subject that will be inlined elsewhere + } + + if (subject_style == LIST_S && + (serd_node_equals(predicate, model->world->rdf_first) || + serd_node_equals(predicate, model->world->rdf_rest))) { + return true; // Skip list statement that write_list will handle + } + + return false; +} + +static SerdStatus +write_subject_types(const DescribeContext* const ctx, + const unsigned depth, + const SerdNode* const subject, + const SerdNode* const graph) +{ + SerdCursor* const t = serd_model_find(ctx->allocator, + ctx->model, + subject, + ctx->model->world->rdf_type, + NULL, + graph); + + const SerdStatus st = + t ? write_pretty_range(ctx, depth + 1, t, subject, true) : SERD_SUCCESS; + + serd_cursor_free(ctx->allocator, t); + return st; +} + +static bool +types_first_for_subject(const DescribeContext* const ctx, const NodeStyle style) +{ + return style != LIST_S && !(ctx->flags & SERD_NO_TYPE_FIRST); +} + +static SerdStatus +write_range_statement(const DescribeContext* const ctx, + const unsigned depth, + SerdStatementFlags statement_flags, + const SerdStatement* ZIX_NONNULL statement, + const SerdNode* ZIX_NULLABLE last_subject, + const bool write_types) +{ + const SerdModel* const model = ctx->model; + const SerdSink* const sink = ctx->sink; + const SerdNode* const subject = serd_statement_subject(statement); + const NodeStyle subject_style = get_node_style(model, subject); + const SerdNode* const predicate = serd_statement_predicate(statement); + const SerdNode* const object = serd_statement_object(statement); + const NodeStyle object_style = get_node_style(model, object); + const SerdNode* const graph = serd_statement_graph(statement); + SerdStatus st = SERD_SUCCESS; + + if (depth == 0U) { + if (skip_range_statement(model, statement)) { + return SERD_SUCCESS; // Skip subject that will be inlined elsewhere + } + + if (subject_style == LIST_S) { + // First write inline list subject, which this statement will follow + if (zix_hash_insert(ctx->list_subjects, subject) != ZIX_STATUS_EXISTS) { + st = write_list(ctx, 2, statement_flags | SERD_LIST_S, subject, graph); + } + } + } + + if (st) { + return st; + } + + // If this is a new subject, write types first if necessary + const bool types_first = types_first_for_subject(ctx, subject_style); + if (subject != last_subject && types_first) { + st = write_subject_types(ctx, depth, subject, graph); + } + + // Skip type statement if it would be written another time (just above) + if (subject_style != LIST_S && !write_types && + serd_node_equals(predicate, model->world->rdf_type)) { + return st; + } + + // Set up the flags for this statement + statement_flags |= + (((subject_style == ANON_S) * (SerdStatementFlags)SERD_EMPTY_S) | + ((object_style == ANON_O) * (SerdStatementFlags)SERD_ANON_O) | + ((object_style == LIST_O) * (SerdStatementFlags)SERD_LIST_O)); + + // Finally write this statement + if ((st = serd_sink_write_statement(sink, statement_flags, statement))) { + return st; + } + + if (object_style == ANON_O) { + // Follow an anonymous object with its description like "[ ... ]" + SerdCursor* const iter = + serd_model_find(ctx->allocator, model, object, NULL, NULL, NULL); + + if (!(st = write_pretty_range(ctx, depth + 1, iter, last_subject, false))) { + st = serd_sink_write_end(sink, object); + } + + serd_cursor_free(ctx->allocator, iter); + + } else if (object_style == LIST_O) { + // Follow a list object with its description like "( ... )" + st = write_list(ctx, depth + 1, 0U, object, graph); + } + + return st; +} + +SerdStatus +serd_describe_range(SerdAllocator* const allocator, + const SerdCursor* const range, + const SerdSink* sink, + const SerdDescribeFlags flags) +{ + if (serd_cursor_is_end(range)) { + return SERD_SUCCESS; + } + + assert(sink); + + SerdCursor copy = *range; + + ZixHash* const list_subjects = + zix_hash_new((ZixAllocator*)allocator, identity, ptr_hash, ptr_equals); + + SerdStatus st = SERD_BAD_ALLOC; + if (list_subjects) { + DescribeContext ctx = {allocator, range->model, sink, list_subjects, flags}; + + st = write_pretty_range(&ctx, 0, ©, NULL, (flags & SERD_NO_TYPE_FIRST)); + } + + zix_hash_free(list_subjects); + return st; +} |