aboutsummaryrefslogtreecommitdiffstats
path: root/tools/serd-sort.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/serd-sort.c')
-rw-r--r--tools/serd-sort.c267
1 files changed, 267 insertions, 0 deletions
diff --git a/tools/serd-sort.c b/tools/serd-sort.c
new file mode 100644
index 00000000..3b9c829a
--- /dev/null
+++ b/tools/serd-sort.c
@@ -0,0 +1,267 @@
+// Copyright 2011-2023 David Robillard <d@drobilla.net>
+// SPDX-License-Identifier: ISC
+
+#include "console.h"
+
+#include "serd/cursor.h"
+#include "serd/describe.h"
+#include "serd/env.h"
+#include "serd/inserter.h"
+#include "serd/model.h"
+#include "serd/reader.h"
+#include "serd/sink.h"
+#include "serd/statement.h"
+#include "serd/status.h"
+#include "serd/syntax.h"
+#include "serd/writer.h"
+#include "zix/attributes.h"
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+/* Application (after parsing command-line arguments) */
+
+// All options
+typedef struct {
+ SerdCommonOptions common;
+ const char* collation;
+ char* const* inputs;
+ intptr_t n_inputs;
+ SerdStatementOrder order;
+ SerdDescribeFlags flags;
+} Options;
+
+ZIX_PURE_FUNC static bool
+input_has_graphs(const Options opts)
+{
+ if (opts.common.input.syntax) {
+ return serd_syntax_has_graphs(opts.common.input.syntax);
+ }
+
+ for (intptr_t i = 0U; i < opts.n_inputs; ++i) {
+ if (serd_syntax_has_graphs(serd_guess_syntax(opts.inputs[i]))) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+// Run the tool using the given options
+static SerdStatus
+run(const Options opts)
+{
+ SerdTool app = {{NULL, NULL, NULL, NULL}, NULL, NULL, NULL};
+
+ // Set up the writing environment
+ SerdStatus st = SERD_SUCCESS;
+ if ((st = serd_tool_setup(&app, "serd-sort", opts.common))) {
+ serd_tool_cleanup(app);
+ return st;
+ }
+
+ // Determine the default order to store statements in the model
+ const bool with_graphs = input_has_graphs(opts);
+ const SerdStatementOrder default_order = opts.collation ? opts.order
+ : with_graphs ? SERD_ORDER_GSPO
+ : SERD_ORDER_SPO;
+
+ const SerdModelFlags flags =
+ (SerdModelFlags)(with_graphs * SERD_STORE_GRAPHS);
+
+ SerdModel* const model = serd_model_new(app.world, default_order, flags);
+
+ if (!opts.collation) {
+ // If we are pretty-printing, we need an O** index
+ serd_model_add_index(model, SERD_ORDER_OPS);
+
+ if (with_graphs) {
+ // If we have graphs we still need the SPO index for finding subjects
+ serd_model_add_index(model, SERD_ORDER_SPO);
+ }
+ }
+
+ // Read all the inputs into an inserter to load the model
+ SerdSink* const inserter = serd_inserter_new(model, NULL);
+ if (st || (st = serd_read_inputs(app.world,
+ opts.common,
+ app.env,
+ opts.n_inputs,
+ opts.inputs,
+ inserter))) {
+ serd_sink_free(inserter);
+ serd_model_free(model);
+ serd_tool_cleanup(app);
+ return st;
+ }
+
+ // Write the model to the output
+ const SerdSink* const target = serd_writer_sink(app.writer);
+ if (opts.collation) {
+ SerdCursor* const cursor =
+ serd_model_begin_ordered(NULL, model, opts.order);
+
+ st = serd_env_write_prefixes(app.env, target);
+
+ for (const SerdStatement* statement = NULL;
+ !st && (statement = serd_cursor_get(cursor));
+ serd_cursor_advance(cursor)) {
+ st = serd_sink_write_statement(target, 0U, statement);
+ }
+
+ serd_cursor_free(NULL, cursor);
+ } else {
+ SerdCursor* const cursor = serd_model_begin(NULL, model);
+
+ if (!(st = serd_env_write_prefixes(app.env, target))) {
+ st = serd_describe_range(NULL, cursor, target, opts.flags);
+ }
+
+ serd_cursor_free(NULL, cursor);
+ }
+
+ if (!st) {
+ st = serd_writer_finish(app.writer);
+ }
+
+ serd_sink_free(inserter);
+ serd_model_free(model);
+
+ const SerdStatus cst = serd_tool_cleanup(app);
+ return st ? st : cst;
+}
+
+/* Command-line interface (before setting up serd) */
+
+static SerdStatus
+parse_statement_order(const char* const string, SerdStatementOrder* const order)
+{
+ static const char* const strings[] = {"SPO",
+ "SOP",
+ "OPS",
+ "OSP",
+ "PSO",
+ "POS",
+ "GSPO",
+ "GSOP",
+ "GOPS",
+ "GOSP",
+ "GPSO",
+ "GPOS",
+ NULL};
+
+ for (unsigned i = 0; strings[i]; ++i) {
+ if (!strcmp(string, strings[i])) {
+ *order = (SerdStatementOrder)i;
+ return SERD_SUCCESS;
+ }
+ }
+
+ return SERD_BAD_ARG;
+}
+
+static int
+print_usage(const char* const name, const bool error)
+{
+ static const char* const description =
+ "Reorder RDF data by loading everything into a model then writing it.\n"
+ "INPUT can be a local filename, or \"-\" to read from standard input.\n\n"
+ " -B BASE_URI Base URI or path for resolving relative references.\n"
+ " -I SYNTAX Input syntax turtle/ntriples/trig/nquads, or option\n"
+ " lax/variables/relative/global/generated.\n"
+ " -O SYNTAX Output syntax empty/turtle/ntriples/nquads, or option\n"
+ " ascii/expanded/verbatim/terse/lax.\n"
+ " -V Display version information and exit.\n"
+ " -b BYTES I/O block size.\n"
+ " -c COLLATION An optional \"G\" then the letters \"SPO\" in any order.\n"
+ " -h Display this help and exit.\n"
+ " -k BYTES Parser stack size.\n"
+ " -o FILENAME Write output to FILENAME instead of stdout.\n"
+ " -t Do not write type as \"a\" before other properties.\n";
+
+ FILE* const os = error ? stderr : stdout;
+ fprintf(os, "%s", error ? "\n" : "");
+ fprintf(os, "Usage: %s [OPTION]... [INPUT]...\n", name);
+ fprintf(os, "%s", description);
+ return error;
+}
+
+// Parse the option pointed to by `iter`, and advance it to the next one
+static SerdStatus
+parse_option(OptionIter* const iter, Options* const opts)
+{
+#define ARG_ERRORF(fmt, ...) \
+ fprintf(stderr, "%s: " fmt, iter->argv[0], __VA_ARGS__)
+
+ SerdStatus st = serd_parse_common_option(iter, &opts->common);
+ if (st != SERD_FAILURE) {
+ return st;
+ }
+
+ const char opt = iter->argv[iter->a][iter->f];
+ switch (opt) {
+ case 'V':
+ return serd_print_version("serd-sort");
+
+ case 'c':
+ if (!(st = serd_get_argument(iter, &opts->collation))) {
+ if ((st = parse_statement_order(opts->collation, &opts->order))) {
+ ARG_ERRORF("unknown collation \"%s\"\n", opts->collation);
+ return st;
+ }
+ }
+ return st;
+
+ case 'h':
+ print_usage(iter->argv[0], false);
+ return SERD_FAILURE;
+
+ default:
+ break;
+ }
+
+ ARG_ERRORF("invalid option -- '%c'\n", opt);
+ return SERD_BAD_ARG;
+
+#undef ARG_ERRORF
+}
+
+int
+main(const int argc, char* const* const argv)
+{
+ char default_input[] = "-";
+ char* default_inputs[] = {default_input};
+
+ Options opts = {serd_default_options(), NULL, NULL, 0U, SERD_ORDER_SPO, 0U};
+
+ // Parse all command line options (which must precede inputs)
+ SerdStatus st = SERD_SUCCESS;
+ OptionIter iter = {argv, argc, 1, 1};
+ while (!serd_option_iter_is_end(iter)) {
+ if ((st = parse_option(&iter, &opts))) {
+ return (st == SERD_FAILURE) ? 0 : print_usage(argv[0], true);
+ }
+ }
+
+ // Order statements to match longhand mode if necessary
+ if (opts.common.output.flags & SERD_WRITE_LONGHAND) {
+ opts.flags |= SERD_NO_TYPE_FIRST;
+ }
+
+ // Every argument past the last option is an input
+ opts.inputs = argv + iter.a;
+ opts.n_inputs = argc - iter.a;
+ if (opts.n_inputs == 0) {
+ opts.n_inputs = 1;
+ opts.inputs = default_inputs;
+ }
+
+ // Don't add prefixes to blank node labels if there is only one input
+ if (opts.n_inputs == 1) {
+ opts.common.input.flags |= SERD_READ_GLOBAL;
+ }
+
+ return run(opts) > SERD_FAILURE;
+}