diff options
author | David Robillard <d@drobilla.net> | 2023-03-31 17:17:41 -0400 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2023-12-02 18:49:08 -0500 |
commit | b5956c4dc6b065d664908104d5fc6752a87e3364 (patch) | |
tree | 6be1fa515891e759092bb9bea082e27c78bfb6de /tools | |
parent | 439d6ec3d6dfbea74334beace790f500e61c9b7d (diff) | |
download | serd-b5956c4dc6b065d664908104d5fc6752a87e3364.tar.gz serd-b5956c4dc6b065d664908104d5fc6752a87e3364.tar.bz2 serd-b5956c4dc6b065d664908104d5fc6752a87e3364.zip |
Add model and serd-sort utility
With all the new functionality, the complexity of the serd-pipe command-line
interface is starting to push the limits of available flags. So, instead of
grafting on further options to control a model, this commit adds a new tool,
serd-sort, which acts somewhat like a stripped-down serd-pipe that stores
statements in a model in memory.
This keeps the complexity (including the user-facing complexity) of any one
tool down, since other more focused tools can be used for streaming tasks in a
pipeline.
In other words, abandon Swissarmyknifeism, take a page from the Unix
philosophy, and try to expose the model functionality to the command-line in a
dedicated focused tool. The model implementation is tested by using this tool
to run a subset of the usual test suites, and a special suite to test statement
sorting.
Diffstat (limited to 'tools')
-rw-r--r-- | tools/console.c | 57 | ||||
-rw-r--r-- | tools/console.h | 5 | ||||
-rw-r--r-- | tools/meson.build | 10 | ||||
-rw-r--r-- | tools/serd-filter.c | 12 | ||||
-rw-r--r-- | tools/serd-pipe.c | 15 | ||||
-rw-r--r-- | tools/serd-sort.c | 267 |
6 files changed, 342 insertions, 24 deletions
diff --git a/tools/console.c b/tools/console.c index 0f66a2f1..0aef9c01 100644 --- a/tools/console.c +++ b/tools/console.c @@ -31,6 +31,23 @@ #define MAX_DEPTH 128U +typedef struct LogLevelLabels { + const char* number; + const char* symbol; + const char* name; +} LogLevelLabels; + +static const LogLevelLabels log_level_strings[] = { + {"0", "emerg", "emergency"}, + {"1", "alert", NULL}, + {"2", "crit", "critical"}, + {"3", "err", "error"}, + {"4", "warn", "warning"}, + {"5", "note", "notice"}, + {"6", "info", NULL}, + {"7", "debug", NULL}, +}; + ZIX_PURE_FUNC bool serd_option_iter_is_end(const OptionIter iter) { @@ -49,6 +66,21 @@ serd_option_iter_advance(OptionIter* const iter) return SERD_SUCCESS; } +SerdCommonOptions +serd_default_options(void) +{ + const SerdCommonOptions opts = { + "", + NULL, + 4096U, + 1048576U, + {SERD_SYNTAX_EMPTY, 0U, false}, + {SERD_SYNTAX_EMPTY, 0U, false}, + SERD_LOG_LEVEL_NOTICE, + }; + return opts; +} + SerdStatus serd_tool_setup(SerdTool* const tool, const char* const program, @@ -336,6 +368,28 @@ serd_parse_output_argument(OptionIter* const iter, return st; } +static SerdStatus +serd_parse_log_level_argument(OptionIter* const iter, + SerdLogLevel* const log_level) +{ + SerdStatus st = SERD_SUCCESS; + const char* argument = NULL; + + if (!(st = serd_get_argument(iter, &argument))) { + fprintf(stderr, "LOG LEVEL: %s\n", argument); + for (unsigned i = 0U; i < (unsigned)SERD_LOG_LEVEL_DEBUG; ++i) { + const LogLevelLabels* const labels = &log_level_strings[i]; + if (!strcmp(argument, labels->number) || + !strcmp(argument, labels->symbol) || + (labels->name && !strcmp(argument, labels->name))) { + *log_level = (SerdLogLevel)i; + } + } + } + + return st; +} + SerdStatus serd_parse_common_option(OptionIter* const iter, SerdCommonOptions* const opts) { @@ -359,6 +413,9 @@ serd_parse_common_option(OptionIter* const iter, SerdCommonOptions* const opts) case 'o': return serd_get_argument(iter, &opts->out_filename); + case 'l': + return serd_parse_log_level_argument(iter, &opts->log_level); + default: break; } diff --git a/tools/console.h b/tools/console.h index d475aebc..c8c68411 100644 --- a/tools/console.h +++ b/tools/console.h @@ -6,6 +6,7 @@ #include "serd/env.h" #include "serd/input_stream.h" +#include "serd/log.h" #include "serd/memory.h" #include "serd/output_stream.h" #include "serd/reader.h" @@ -43,6 +44,7 @@ typedef struct { size_t stack_size; SerdSyntaxOptions input; SerdSyntaxOptions output; + SerdLogLevel log_level; } SerdCommonOptions; // Common "global" state of a command-line tool that writes data @@ -59,6 +61,9 @@ serd_option_iter_is_end(OptionIter iter); SerdStatus serd_option_iter_advance(OptionIter* iter); +SerdCommonOptions +serd_default_options(void); + SerdStatus serd_tool_setup(SerdTool* tool, const char* program, SerdCommonOptions options); diff --git a/tools/meson.build b/tools/meson.build index 43902c74..af47f217 100644 --- a/tools/meson.build +++ b/tools/meson.build @@ -26,5 +26,15 @@ serd_pipe = executable( link_args: tool_link_args, ) +serd_sort = executable( + 'serd-sort', + files('console.c', 'serd-sort.c'), + c_args: tool_c_args, + dependencies: [serd_dep, zix_dep], + install: true, + link_args: tool_link_args, +) + meson.override_find_program('serd-filter', serd_filter) meson.override_find_program('serd-pipe', serd_pipe) +meson.override_find_program('serd-sort', serd_sort) diff --git a/tools/serd-filter.c b/tools/serd-filter.c index 01834e5a..70d7b68c 100644 --- a/tools/serd-filter.c +++ b/tools/serd-filter.c @@ -274,17 +274,7 @@ main(int argc, char** argv) char default_input[] = "-"; char* default_inputs[] = {default_input}; - Options opts = {{"", - NULL, - 4096U, - 1048576U, - {SERD_SYNTAX_EMPTY, 0U, false}, - {SERD_NQUADS, 0U, false}}, - NULL, - NULL, - NULL, - 0U, - false}; + Options opts = {serd_default_options(), NULL, NULL, NULL, 0U, false}; // Parse all command line options (which must precede inputs) SerdStatus st = SERD_SUCCESS; diff --git a/tools/serd-pipe.c b/tools/serd-pipe.c index bbed9fa8..fb1586b4 100644 --- a/tools/serd-pipe.c +++ b/tools/serd-pipe.c @@ -108,8 +108,8 @@ print_usage(const char* const name, const bool error) " -b BYTES I/O block size.\n" " -h Display this help and exit.\n" " -k BYTES Parser stack size.\n" + " -l LEVEL Maximum log level: 0 to 7, or emerg to debug.\n" " -o FILENAME Write output to FILENAME instead of stdout.\n" - " -q Suppress warning and error output.\n" " -s STRING Parse STRING as input.\n"; FILE* const os = error ? stderr : stdout; @@ -179,18 +179,7 @@ main(const int argc, char* const* const argv) char default_input[] = {'-', '\0'}; char* default_inputs[] = {default_input}; - Options opts = {{"", - NULL, - 4096U, - 1048576U, - {SERD_SYNTAX_EMPTY, 0U, false}, - {SERD_SYNTAX_EMPTY, 0U, false}}, - "", - NULL, - NULL, - 0U, - false, - false}; + Options opts = {serd_default_options(), "", NULL, NULL, 0U, false, false}; // Parse all command line options (which must precede inputs) SerdStatus st = SERD_SUCCESS; diff --git a/tools/serd-sort.c b/tools/serd-sort.c new file mode 100644 index 00000000..3b9c829a --- /dev/null +++ b/tools/serd-sort.c @@ -0,0 +1,267 @@ +// Copyright 2011-2023 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#include "console.h" + +#include "serd/cursor.h" +#include "serd/describe.h" +#include "serd/env.h" +#include "serd/inserter.h" +#include "serd/model.h" +#include "serd/reader.h" +#include "serd/sink.h" +#include "serd/statement.h" +#include "serd/status.h" +#include "serd/syntax.h" +#include "serd/writer.h" +#include "zix/attributes.h" + +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> + +/* Application (after parsing command-line arguments) */ + +// All options +typedef struct { + SerdCommonOptions common; + const char* collation; + char* const* inputs; + intptr_t n_inputs; + SerdStatementOrder order; + SerdDescribeFlags flags; +} Options; + +ZIX_PURE_FUNC static bool +input_has_graphs(const Options opts) +{ + if (opts.common.input.syntax) { + return serd_syntax_has_graphs(opts.common.input.syntax); + } + + for (intptr_t i = 0U; i < opts.n_inputs; ++i) { + if (serd_syntax_has_graphs(serd_guess_syntax(opts.inputs[i]))) { + return true; + } + } + + return false; +} + +// Run the tool using the given options +static SerdStatus +run(const Options opts) +{ + SerdTool app = {{NULL, NULL, NULL, NULL}, NULL, NULL, NULL}; + + // Set up the writing environment + SerdStatus st = SERD_SUCCESS; + if ((st = serd_tool_setup(&app, "serd-sort", opts.common))) { + serd_tool_cleanup(app); + return st; + } + + // Determine the default order to store statements in the model + const bool with_graphs = input_has_graphs(opts); + const SerdStatementOrder default_order = opts.collation ? opts.order + : with_graphs ? SERD_ORDER_GSPO + : SERD_ORDER_SPO; + + const SerdModelFlags flags = + (SerdModelFlags)(with_graphs * SERD_STORE_GRAPHS); + + SerdModel* const model = serd_model_new(app.world, default_order, flags); + + if (!opts.collation) { + // If we are pretty-printing, we need an O** index + serd_model_add_index(model, SERD_ORDER_OPS); + + if (with_graphs) { + // If we have graphs we still need the SPO index for finding subjects + serd_model_add_index(model, SERD_ORDER_SPO); + } + } + + // Read all the inputs into an inserter to load the model + SerdSink* const inserter = serd_inserter_new(model, NULL); + if (st || (st = serd_read_inputs(app.world, + opts.common, + app.env, + opts.n_inputs, + opts.inputs, + inserter))) { + serd_sink_free(inserter); + serd_model_free(model); + serd_tool_cleanup(app); + return st; + } + + // Write the model to the output + const SerdSink* const target = serd_writer_sink(app.writer); + if (opts.collation) { + SerdCursor* const cursor = + serd_model_begin_ordered(NULL, model, opts.order); + + st = serd_env_write_prefixes(app.env, target); + + for (const SerdStatement* statement = NULL; + !st && (statement = serd_cursor_get(cursor)); + serd_cursor_advance(cursor)) { + st = serd_sink_write_statement(target, 0U, statement); + } + + serd_cursor_free(NULL, cursor); + } else { + SerdCursor* const cursor = serd_model_begin(NULL, model); + + if (!(st = serd_env_write_prefixes(app.env, target))) { + st = serd_describe_range(NULL, cursor, target, opts.flags); + } + + serd_cursor_free(NULL, cursor); + } + + if (!st) { + st = serd_writer_finish(app.writer); + } + + serd_sink_free(inserter); + serd_model_free(model); + + const SerdStatus cst = serd_tool_cleanup(app); + return st ? st : cst; +} + +/* Command-line interface (before setting up serd) */ + +static SerdStatus +parse_statement_order(const char* const string, SerdStatementOrder* const order) +{ + static const char* const strings[] = {"SPO", + "SOP", + "OPS", + "OSP", + "PSO", + "POS", + "GSPO", + "GSOP", + "GOPS", + "GOSP", + "GPSO", + "GPOS", + NULL}; + + for (unsigned i = 0; strings[i]; ++i) { + if (!strcmp(string, strings[i])) { + *order = (SerdStatementOrder)i; + return SERD_SUCCESS; + } + } + + return SERD_BAD_ARG; +} + +static int +print_usage(const char* const name, const bool error) +{ + static const char* const description = + "Reorder RDF data by loading everything into a model then writing it.\n" + "INPUT can be a local filename, or \"-\" to read from standard input.\n\n" + " -B BASE_URI Base URI or path for resolving relative references.\n" + " -I SYNTAX Input syntax turtle/ntriples/trig/nquads, or option\n" + " lax/variables/relative/global/generated.\n" + " -O SYNTAX Output syntax empty/turtle/ntriples/nquads, or option\n" + " ascii/expanded/verbatim/terse/lax.\n" + " -V Display version information and exit.\n" + " -b BYTES I/O block size.\n" + " -c COLLATION An optional \"G\" then the letters \"SPO\" in any order.\n" + " -h Display this help and exit.\n" + " -k BYTES Parser stack size.\n" + " -o FILENAME Write output to FILENAME instead of stdout.\n" + " -t Do not write type as \"a\" before other properties.\n"; + + FILE* const os = error ? stderr : stdout; + fprintf(os, "%s", error ? "\n" : ""); + fprintf(os, "Usage: %s [OPTION]... [INPUT]...\n", name); + fprintf(os, "%s", description); + return error; +} + +// Parse the option pointed to by `iter`, and advance it to the next one +static SerdStatus +parse_option(OptionIter* const iter, Options* const opts) +{ +#define ARG_ERRORF(fmt, ...) \ + fprintf(stderr, "%s: " fmt, iter->argv[0], __VA_ARGS__) + + SerdStatus st = serd_parse_common_option(iter, &opts->common); + if (st != SERD_FAILURE) { + return st; + } + + const char opt = iter->argv[iter->a][iter->f]; + switch (opt) { + case 'V': + return serd_print_version("serd-sort"); + + case 'c': + if (!(st = serd_get_argument(iter, &opts->collation))) { + if ((st = parse_statement_order(opts->collation, &opts->order))) { + ARG_ERRORF("unknown collation \"%s\"\n", opts->collation); + return st; + } + } + return st; + + case 'h': + print_usage(iter->argv[0], false); + return SERD_FAILURE; + + default: + break; + } + + ARG_ERRORF("invalid option -- '%c'\n", opt); + return SERD_BAD_ARG; + +#undef ARG_ERRORF +} + +int +main(const int argc, char* const* const argv) +{ + char default_input[] = "-"; + char* default_inputs[] = {default_input}; + + Options opts = {serd_default_options(), NULL, NULL, 0U, SERD_ORDER_SPO, 0U}; + + // Parse all command line options (which must precede inputs) + SerdStatus st = SERD_SUCCESS; + OptionIter iter = {argv, argc, 1, 1}; + while (!serd_option_iter_is_end(iter)) { + if ((st = parse_option(&iter, &opts))) { + return (st == SERD_FAILURE) ? 0 : print_usage(argv[0], true); + } + } + + // Order statements to match longhand mode if necessary + if (opts.common.output.flags & SERD_WRITE_LONGHAND) { + opts.flags |= SERD_NO_TYPE_FIRST; + } + + // Every argument past the last option is an input + opts.inputs = argv + iter.a; + opts.n_inputs = argc - iter.a; + if (opts.n_inputs == 0) { + opts.n_inputs = 1; + opts.inputs = default_inputs; + } + + // Don't add prefixes to blank node labels if there is only one input + if (opts.n_inputs == 1) { + opts.common.input.flags |= SERD_READ_GLOBAL; + } + + return run(opts) > SERD_FAILURE; +} |