aboutsummaryrefslogtreecommitdiffstats
path: root/tools/serd-sort.c
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2021-10-21 15:38:10 -0400
committerDavid Robillard <d@drobilla.net>2022-01-28 21:57:07 -0500
commitb404312686874e539b617d1f27ccbaa5a82936af (patch)
treec2fdb2cc046e6da53071629cd1750dcc327e6cd9 /tools/serd-sort.c
parentd4aec28ba8ad24d5aef3ee12beeb1b805148eab1 (diff)
downloadserd-b404312686874e539b617d1f27ccbaa5a82936af.tar.gz
serd-b404312686874e539b617d1f27ccbaa5a82936af.tar.bz2
serd-b404312686874e539b617d1f27ccbaa5a82936af.zip
Replace serdi with more fine-grained tools
Especially with the new functionality, the complexity of the command-line interface alone was really becoming unmanageable. The serdi implementation also had the highest cyclomatic complexity of the entire codebase by a huge margin. So, take a page from the Unix philosophy and split serdi into several more finely-honed tools that can be freely composed. Though there is still unfortunately quite a bit of option overlap between them due to the common details of reading RDF, I think the resulting tools are a lot easier to understand, both from a user and a developer perspective.
Diffstat (limited to 'tools/serd-sort.c')
-rw-r--r--tools/serd-sort.c274
1 files changed, 274 insertions, 0 deletions
diff --git a/tools/serd-sort.c b/tools/serd-sort.c
new file mode 100644
index 00000000..deb79cb5
--- /dev/null
+++ b/tools/serd-sort.c
@@ -0,0 +1,274 @@
+/*
+ Copyright 2011-2021 David Robillard <d@drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#include "console.h"
+
+#include "serd/serd.h"
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+/* Application (after parsing command-line arguments) */
+
+// All options
+typedef struct {
+ SerdCommonOptions common;
+ const char* root_uri;
+ const char* input_string;
+ const char* collation;
+ char* const* inputs;
+ intptr_t n_inputs;
+ SerdStatementOrder order;
+ SerdDescribeFlags flags;
+} Options;
+
+static bool
+input_has_graphs(const Options opts)
+{
+ if (opts.common.input.syntax) {
+ return serd_syntax_has_graphs(opts.common.input.syntax);
+ }
+
+ for (intptr_t i = 0u; i < opts.n_inputs; ++i) {
+ if (serd_syntax_has_graphs(serd_guess_syntax(opts.inputs[i]))) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+// Run the tool using the given options
+static SerdStatus
+run(const Options opts)
+{
+ SerdTool app = {NULL, NULL, NULL, NULL};
+
+ // Set up the writing environment
+ SerdStatus st = SERD_SUCCESS;
+ if ((st = serd_tool_setup(&app, "serd-sort", opts.common))) {
+ serd_tool_cleanup(app);
+ return st;
+ }
+
+ // Determine the default order to store statements in the model
+ const bool with_graphs = input_has_graphs(opts);
+ const SerdStatementOrder default_order = opts.collation ? opts.order
+ : with_graphs ? SERD_ORDER_GSPO
+ : SERD_ORDER_SPO;
+
+ const SerdModelFlags flags =
+ (SerdModelFlags)(with_graphs * SERD_STORE_GRAPHS);
+
+ SerdModel* const model = serd_model_new(app.world, default_order, flags);
+
+ if (!opts.collation) {
+ // If we are pretty-printing, we need an O** index
+ serd_model_add_index(model, SERD_ORDER_OPS);
+
+ if (with_graphs) {
+ // If we have graphs we still need the SPO index for finding subjects
+ serd_model_add_index(model, SERD_ORDER_SPO);
+ }
+ }
+
+ // Read all the inputs into an inserter to load the model
+ SerdSink* const inserter = serd_inserter_new(model, NULL);
+ if (st || (st = serd_read_inputs(app.world,
+ opts.common,
+ app.env,
+ opts.n_inputs,
+ opts.inputs,
+ inserter))) {
+ serd_tool_cleanup(app);
+ return st;
+ }
+
+ // Write the model to the output
+ const SerdSink* const target = serd_writer_sink(app.writer);
+ if (opts.collation) {
+ SerdCursor* const cursor = serd_model_begin_ordered(model, opts.order);
+
+ serd_env_write_prefixes(app.env, target);
+
+ for (const SerdStatement* statement = NULL;
+ !st && (statement = serd_cursor_get(cursor));
+ serd_cursor_advance(cursor)) {
+ st = serd_sink_write_statement(target, 0u, statement);
+ }
+
+ serd_cursor_free(cursor);
+ } else {
+ SerdCursor* const cursor = serd_model_begin(model);
+
+ serd_env_write_prefixes(app.env, target);
+
+ st = serd_describe_range(cursor, target, opts.flags);
+
+ serd_cursor_free(cursor);
+ }
+
+ if (!st) {
+ st = serd_writer_finish(app.writer);
+ }
+
+ const SerdStatus cst = serd_tool_cleanup(app);
+ return st ? st : cst;
+}
+
+/* Command-line interface (before setting up serd) */
+
+static SerdStatus
+parse_statement_order(const char* const string, SerdStatementOrder* const order)
+{
+ static const char* const strings[] = {"SPO",
+ "SOP",
+ "OPS",
+ "OSP",
+ "PSO",
+ "POS",
+ "GSPO",
+ "GSOP",
+ "GOPS",
+ "GOSP",
+ "GPSO",
+ "GPOS",
+ NULL};
+
+ for (unsigned i = 0; strings[i]; ++i) {
+ if (!strcmp(string, strings[i])) {
+ *order = (SerdStatementOrder)i;
+ return SERD_SUCCESS;
+ }
+ }
+
+ return SERD_ERR_BAD_ARG;
+}
+
+static int
+print_usage(const char* const name, const bool error)
+{
+ static const char* const description =
+ "Reorder RDF data by loading everything into a model then writing it.\n"
+ "INPUT can be a local filename, or \"-\" to read from standard input.\n\n"
+ " -B BASE_URI Base URI or path for resolving relative references.\n"
+ " -I SYNTAX Input syntax (turtle/ntriples/trig/nquads),\n"
+ " or option (lax/variables/relative/global/generated).\n"
+ " -O SYNTAX Output syntax (empty/turtle/ntriples/nquads),\n"
+ " or option (ascii/expanded/verbatim/terse/lax).\n"
+ " -V Display version information and exit.\n"
+ " -b BYTES I/O block size.\n"
+ " -c COLLATION An optional \"G\" then the letters \"SPO\" in any order.\n"
+ " -h Display this help and exit.\n"
+ " -k BYTES Parser stack size.\n"
+ " -o FILENAME Write output to FILENAME instead of stdout.\n";
+
+ FILE* const os = error ? stderr : stdout;
+ fprintf(os, "%s", error ? "\n" : "");
+ fprintf(os, "Usage: %s [OPTION]... INPUT...\n", name);
+ fprintf(os, "%s", description);
+ return error;
+}
+
+// Parse the option pointed to by `iter`, and advance it to the next one
+static SerdStatus
+parse_option(OptionIter* const iter, Options* const opts)
+{
+#define ARG_ERRORF(fmt, ...) \
+ fprintf(stderr, "%s: " fmt, iter->argv[0], __VA_ARGS__)
+
+ SerdStatus st = serd_parse_common_option(iter, &opts->common);
+ if (st != SERD_FAILURE) {
+ return st;
+ }
+
+ const char opt = iter->argv[iter->a][iter->f];
+ switch (opt) {
+ case 'R':
+ return serd_get_argument(iter, &opts->root_uri);
+
+ case 'V':
+ return serd_print_version("serd-sort");
+
+ case 'c':
+ if (!(st = serd_get_argument(iter, &opts->collation))) {
+ if ((st = parse_statement_order(opts->collation, &opts->order))) {
+ ARG_ERRORF("unknown collation \"%s\"\n", opts->collation);
+ return st;
+ }
+ }
+ return st;
+
+ case 'h':
+ print_usage(iter->argv[0], false);
+ return SERD_FAILURE;
+
+ case 's':
+ return serd_get_argument(iter, &opts->input_string);
+
+ default:
+ break;
+ }
+
+ ARG_ERRORF("invalid option -- '%c'\n", opt);
+ return SERD_ERR_BAD_ARG;
+
+#undef ARG_ERRORF
+}
+
+int
+main(const int argc, char* const* const argv)
+{
+ Options opts = {{"",
+ NULL,
+ 4096u,
+ 1048576u,
+ {SERD_SYNTAX_EMPTY, 0u, false},
+ {SERD_SYNTAX_EMPTY, 0u, false}},
+ "",
+ NULL,
+ NULL,
+ NULL,
+ 0u,
+ SERD_ORDER_SPO,
+ 0u};
+
+ // Parse all command line options (which must precede inputs)
+ SerdStatus st = SERD_SUCCESS;
+ OptionIter iter = {argv, argc, 1, 1};
+ while (!serd_option_iter_is_end(iter)) {
+ if ((st = parse_option(&iter, &opts))) {
+ return (st == SERD_FAILURE) ? 0 : print_usage(argv[0], true);
+ }
+ }
+
+ // Every argument past the last option is an input
+ opts.inputs = argv + iter.a;
+ opts.n_inputs = argc - iter.a;
+ if (opts.n_inputs + (bool)opts.input_string == 0) {
+ fprintf(stderr, "%s: missing input\n", argv[0]);
+ return print_usage(argv[0], true);
+ }
+
+ // Don't add prefixes to blank node labels if there is only one input
+ if (opts.n_inputs + (bool)opts.input_string == 1) {
+ opts.common.input.flags |= SERD_READ_GLOBAL;
+ }
+
+ return run(opts) > SERD_FAILURE;
+}