diff options
author | David Robillard <d@drobilla.net> | 2021-10-21 15:38:10 -0400 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2022-01-28 21:57:07 -0500 |
commit | b404312686874e539b617d1f27ccbaa5a82936af (patch) | |
tree | c2fdb2cc046e6da53071629cd1750dcc327e6cd9 /tools/serd-sort.c | |
parent | d4aec28ba8ad24d5aef3ee12beeb1b805148eab1 (diff) | |
download | serd-b404312686874e539b617d1f27ccbaa5a82936af.tar.gz serd-b404312686874e539b617d1f27ccbaa5a82936af.tar.bz2 serd-b404312686874e539b617d1f27ccbaa5a82936af.zip |
Replace serdi with more fine-grained tools
Especially with the new functionality, the complexity of the command-line
interface alone was really becoming unmanageable. The serdi implementation
also had the highest cyclomatic complexity of the entire codebase by a huge
margin.
So, take a page from the Unix philosophy and split serdi into several more
finely-honed tools that can be freely composed. Though there is still
unfortunately quite a bit of option overlap between them due to the common
details of reading RDF, I think the resulting tools are a lot easier to
understand, both from a user and a developer perspective.
Diffstat (limited to 'tools/serd-sort.c')
-rw-r--r-- | tools/serd-sort.c | 274 |
1 files changed, 274 insertions, 0 deletions
diff --git a/tools/serd-sort.c b/tools/serd-sort.c new file mode 100644 index 00000000..deb79cb5 --- /dev/null +++ b/tools/serd-sort.c @@ -0,0 +1,274 @@ +/* + Copyright 2011-2021 David Robillard <d@drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "console.h" + +#include "serd/serd.h" + +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> + +/* Application (after parsing command-line arguments) */ + +// All options +typedef struct { + SerdCommonOptions common; + const char* root_uri; + const char* input_string; + const char* collation; + char* const* inputs; + intptr_t n_inputs; + SerdStatementOrder order; + SerdDescribeFlags flags; +} Options; + +static bool +input_has_graphs(const Options opts) +{ + if (opts.common.input.syntax) { + return serd_syntax_has_graphs(opts.common.input.syntax); + } + + for (intptr_t i = 0u; i < opts.n_inputs; ++i) { + if (serd_syntax_has_graphs(serd_guess_syntax(opts.inputs[i]))) { + return true; + } + } + + return false; +} + +// Run the tool using the given options +static SerdStatus +run(const Options opts) +{ + SerdTool app = {NULL, NULL, NULL, NULL}; + + // Set up the writing environment + SerdStatus st = SERD_SUCCESS; + if ((st = serd_tool_setup(&app, "serd-sort", opts.common))) { + serd_tool_cleanup(app); + return st; + } + + // Determine the default order to store statements in the model + const bool with_graphs = input_has_graphs(opts); + const SerdStatementOrder default_order = opts.collation ? opts.order + : with_graphs ? SERD_ORDER_GSPO + : SERD_ORDER_SPO; + + const SerdModelFlags flags = + (SerdModelFlags)(with_graphs * SERD_STORE_GRAPHS); + + SerdModel* const model = serd_model_new(app.world, default_order, flags); + + if (!opts.collation) { + // If we are pretty-printing, we need an O** index + serd_model_add_index(model, SERD_ORDER_OPS); + + if (with_graphs) { + // If we have graphs we still need the SPO index for finding subjects + serd_model_add_index(model, SERD_ORDER_SPO); + } + } + + // Read all the inputs into an inserter to load the model + SerdSink* const inserter = serd_inserter_new(model, NULL); + if (st || (st = serd_read_inputs(app.world, + opts.common, + app.env, + opts.n_inputs, + opts.inputs, + inserter))) { + serd_tool_cleanup(app); + return st; + } + + // Write the model to the output + const SerdSink* const target = serd_writer_sink(app.writer); + if (opts.collation) { + SerdCursor* const cursor = serd_model_begin_ordered(model, opts.order); + + serd_env_write_prefixes(app.env, target); + + for (const SerdStatement* statement = NULL; + !st && (statement = serd_cursor_get(cursor)); + serd_cursor_advance(cursor)) { + st = serd_sink_write_statement(target, 0u, statement); + } + + serd_cursor_free(cursor); + } else { + SerdCursor* const cursor = serd_model_begin(model); + + serd_env_write_prefixes(app.env, target); + + st = serd_describe_range(cursor, target, opts.flags); + + serd_cursor_free(cursor); + } + + if (!st) { + st = serd_writer_finish(app.writer); + } + + const SerdStatus cst = serd_tool_cleanup(app); + return st ? st : cst; +} + +/* Command-line interface (before setting up serd) */ + +static SerdStatus +parse_statement_order(const char* const string, SerdStatementOrder* const order) +{ + static const char* const strings[] = {"SPO", + "SOP", + "OPS", + "OSP", + "PSO", + "POS", + "GSPO", + "GSOP", + "GOPS", + "GOSP", + "GPSO", + "GPOS", + NULL}; + + for (unsigned i = 0; strings[i]; ++i) { + if (!strcmp(string, strings[i])) { + *order = (SerdStatementOrder)i; + return SERD_SUCCESS; + } + } + + return SERD_ERR_BAD_ARG; +} + +static int +print_usage(const char* const name, const bool error) +{ + static const char* const description = + "Reorder RDF data by loading everything into a model then writing it.\n" + "INPUT can be a local filename, or \"-\" to read from standard input.\n\n" + " -B BASE_URI Base URI or path for resolving relative references.\n" + " -I SYNTAX Input syntax (turtle/ntriples/trig/nquads),\n" + " or option (lax/variables/relative/global/generated).\n" + " -O SYNTAX Output syntax (empty/turtle/ntriples/nquads),\n" + " or option (ascii/expanded/verbatim/terse/lax).\n" + " -V Display version information and exit.\n" + " -b BYTES I/O block size.\n" + " -c COLLATION An optional \"G\" then the letters \"SPO\" in any order.\n" + " -h Display this help and exit.\n" + " -k BYTES Parser stack size.\n" + " -o FILENAME Write output to FILENAME instead of stdout.\n"; + + FILE* const os = error ? stderr : stdout; + fprintf(os, "%s", error ? "\n" : ""); + fprintf(os, "Usage: %s [OPTION]... INPUT...\n", name); + fprintf(os, "%s", description); + return error; +} + +// Parse the option pointed to by `iter`, and advance it to the next one +static SerdStatus +parse_option(OptionIter* const iter, Options* const opts) +{ +#define ARG_ERRORF(fmt, ...) \ + fprintf(stderr, "%s: " fmt, iter->argv[0], __VA_ARGS__) + + SerdStatus st = serd_parse_common_option(iter, &opts->common); + if (st != SERD_FAILURE) { + return st; + } + + const char opt = iter->argv[iter->a][iter->f]; + switch (opt) { + case 'R': + return serd_get_argument(iter, &opts->root_uri); + + case 'V': + return serd_print_version("serd-sort"); + + case 'c': + if (!(st = serd_get_argument(iter, &opts->collation))) { + if ((st = parse_statement_order(opts->collation, &opts->order))) { + ARG_ERRORF("unknown collation \"%s\"\n", opts->collation); + return st; + } + } + return st; + + case 'h': + print_usage(iter->argv[0], false); + return SERD_FAILURE; + + case 's': + return serd_get_argument(iter, &opts->input_string); + + default: + break; + } + + ARG_ERRORF("invalid option -- '%c'\n", opt); + return SERD_ERR_BAD_ARG; + +#undef ARG_ERRORF +} + +int +main(const int argc, char* const* const argv) +{ + Options opts = {{"", + NULL, + 4096u, + 1048576u, + {SERD_SYNTAX_EMPTY, 0u, false}, + {SERD_SYNTAX_EMPTY, 0u, false}}, + "", + NULL, + NULL, + NULL, + 0u, + SERD_ORDER_SPO, + 0u}; + + // Parse all command line options (which must precede inputs) + SerdStatus st = SERD_SUCCESS; + OptionIter iter = {argv, argc, 1, 1}; + while (!serd_option_iter_is_end(iter)) { + if ((st = parse_option(&iter, &opts))) { + return (st == SERD_FAILURE) ? 0 : print_usage(argv[0], true); + } + } + + // Every argument past the last option is an input + opts.inputs = argv + iter.a; + opts.n_inputs = argc - iter.a; + if (opts.n_inputs + (bool)opts.input_string == 0) { + fprintf(stderr, "%s: missing input\n", argv[0]); + return print_usage(argv[0], true); + } + + // Don't add prefixes to blank node labels if there is only one input + if (opts.n_inputs + (bool)opts.input_string == 1) { + opts.common.input.flags |= SERD_READ_GLOBAL; + } + + return run(opts) > SERD_FAILURE; +} |