aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2023-03-31 17:17:41 -0400
committerDavid Robillard <d@drobilla.net>2023-12-02 18:49:08 -0500
commitb5956c4dc6b065d664908104d5fc6752a87e3364 (patch)
tree6be1fa515891e759092bb9bea082e27c78bfb6de /tools
parent439d6ec3d6dfbea74334beace790f500e61c9b7d (diff)
downloadserd-b5956c4dc6b065d664908104d5fc6752a87e3364.tar.gz
serd-b5956c4dc6b065d664908104d5fc6752a87e3364.tar.bz2
serd-b5956c4dc6b065d664908104d5fc6752a87e3364.zip
Add model and serd-sort utility
With all the new functionality, the complexity of the serd-pipe command-line interface is starting to push the limits of available flags. So, instead of grafting on further options to control a model, this commit adds a new tool, serd-sort, which acts somewhat like a stripped-down serd-pipe that stores statements in a model in memory. This keeps the complexity (including the user-facing complexity) of any one tool down, since other more focused tools can be used for streaming tasks in a pipeline. In other words, abandon Swissarmyknifeism, take a page from the Unix philosophy, and try to expose the model functionality to the command-line in a dedicated focused tool. The model implementation is tested by using this tool to run a subset of the usual test suites, and a special suite to test statement sorting.
Diffstat (limited to 'tools')
-rw-r--r--tools/console.c57
-rw-r--r--tools/console.h5
-rw-r--r--tools/meson.build10
-rw-r--r--tools/serd-filter.c12
-rw-r--r--tools/serd-pipe.c15
-rw-r--r--tools/serd-sort.c267
6 files changed, 342 insertions, 24 deletions
diff --git a/tools/console.c b/tools/console.c
index 0f66a2f1..0aef9c01 100644
--- a/tools/console.c
+++ b/tools/console.c
@@ -31,6 +31,23 @@
#define MAX_DEPTH 128U
+typedef struct LogLevelLabels {
+ const char* number;
+ const char* symbol;
+ const char* name;
+} LogLevelLabels;
+
+static const LogLevelLabels log_level_strings[] = {
+ {"0", "emerg", "emergency"},
+ {"1", "alert", NULL},
+ {"2", "crit", "critical"},
+ {"3", "err", "error"},
+ {"4", "warn", "warning"},
+ {"5", "note", "notice"},
+ {"6", "info", NULL},
+ {"7", "debug", NULL},
+};
+
ZIX_PURE_FUNC bool
serd_option_iter_is_end(const OptionIter iter)
{
@@ -49,6 +66,21 @@ serd_option_iter_advance(OptionIter* const iter)
return SERD_SUCCESS;
}
+SerdCommonOptions
+serd_default_options(void)
+{
+ const SerdCommonOptions opts = {
+ "",
+ NULL,
+ 4096U,
+ 1048576U,
+ {SERD_SYNTAX_EMPTY, 0U, false},
+ {SERD_SYNTAX_EMPTY, 0U, false},
+ SERD_LOG_LEVEL_NOTICE,
+ };
+ return opts;
+}
+
SerdStatus
serd_tool_setup(SerdTool* const tool,
const char* const program,
@@ -336,6 +368,28 @@ serd_parse_output_argument(OptionIter* const iter,
return st;
}
+static SerdStatus
+serd_parse_log_level_argument(OptionIter* const iter,
+ SerdLogLevel* const log_level)
+{
+ SerdStatus st = SERD_SUCCESS;
+ const char* argument = NULL;
+
+ if (!(st = serd_get_argument(iter, &argument))) {
+ fprintf(stderr, "LOG LEVEL: %s\n", argument);
+ for (unsigned i = 0U; i < (unsigned)SERD_LOG_LEVEL_DEBUG; ++i) {
+ const LogLevelLabels* const labels = &log_level_strings[i];
+ if (!strcmp(argument, labels->number) ||
+ !strcmp(argument, labels->symbol) ||
+ (labels->name && !strcmp(argument, labels->name))) {
+ *log_level = (SerdLogLevel)i;
+ }
+ }
+ }
+
+ return st;
+}
+
SerdStatus
serd_parse_common_option(OptionIter* const iter, SerdCommonOptions* const opts)
{
@@ -359,6 +413,9 @@ serd_parse_common_option(OptionIter* const iter, SerdCommonOptions* const opts)
case 'o':
return serd_get_argument(iter, &opts->out_filename);
+ case 'l':
+ return serd_parse_log_level_argument(iter, &opts->log_level);
+
default:
break;
}
diff --git a/tools/console.h b/tools/console.h
index d475aebc..c8c68411 100644
--- a/tools/console.h
+++ b/tools/console.h
@@ -6,6 +6,7 @@
#include "serd/env.h"
#include "serd/input_stream.h"
+#include "serd/log.h"
#include "serd/memory.h"
#include "serd/output_stream.h"
#include "serd/reader.h"
@@ -43,6 +44,7 @@ typedef struct {
size_t stack_size;
SerdSyntaxOptions input;
SerdSyntaxOptions output;
+ SerdLogLevel log_level;
} SerdCommonOptions;
// Common "global" state of a command-line tool that writes data
@@ -59,6 +61,9 @@ serd_option_iter_is_end(OptionIter iter);
SerdStatus
serd_option_iter_advance(OptionIter* iter);
+SerdCommonOptions
+serd_default_options(void);
+
SerdStatus
serd_tool_setup(SerdTool* tool, const char* program, SerdCommonOptions options);
diff --git a/tools/meson.build b/tools/meson.build
index 43902c74..af47f217 100644
--- a/tools/meson.build
+++ b/tools/meson.build
@@ -26,5 +26,15 @@ serd_pipe = executable(
link_args: tool_link_args,
)
+serd_sort = executable(
+ 'serd-sort',
+ files('console.c', 'serd-sort.c'),
+ c_args: tool_c_args,
+ dependencies: [serd_dep, zix_dep],
+ install: true,
+ link_args: tool_link_args,
+)
+
meson.override_find_program('serd-filter', serd_filter)
meson.override_find_program('serd-pipe', serd_pipe)
+meson.override_find_program('serd-sort', serd_sort)
diff --git a/tools/serd-filter.c b/tools/serd-filter.c
index 01834e5a..70d7b68c 100644
--- a/tools/serd-filter.c
+++ b/tools/serd-filter.c
@@ -274,17 +274,7 @@ main(int argc, char** argv)
char default_input[] = "-";
char* default_inputs[] = {default_input};
- Options opts = {{"",
- NULL,
- 4096U,
- 1048576U,
- {SERD_SYNTAX_EMPTY, 0U, false},
- {SERD_NQUADS, 0U, false}},
- NULL,
- NULL,
- NULL,
- 0U,
- false};
+ Options opts = {serd_default_options(), NULL, NULL, NULL, 0U, false};
// Parse all command line options (which must precede inputs)
SerdStatus st = SERD_SUCCESS;
diff --git a/tools/serd-pipe.c b/tools/serd-pipe.c
index bbed9fa8..fb1586b4 100644
--- a/tools/serd-pipe.c
+++ b/tools/serd-pipe.c
@@ -108,8 +108,8 @@ print_usage(const char* const name, const bool error)
" -b BYTES I/O block size.\n"
" -h Display this help and exit.\n"
" -k BYTES Parser stack size.\n"
+ " -l LEVEL Maximum log level: 0 to 7, or emerg to debug.\n"
" -o FILENAME Write output to FILENAME instead of stdout.\n"
- " -q Suppress warning and error output.\n"
" -s STRING Parse STRING as input.\n";
FILE* const os = error ? stderr : stdout;
@@ -179,18 +179,7 @@ main(const int argc, char* const* const argv)
char default_input[] = {'-', '\0'};
char* default_inputs[] = {default_input};
- Options opts = {{"",
- NULL,
- 4096U,
- 1048576U,
- {SERD_SYNTAX_EMPTY, 0U, false},
- {SERD_SYNTAX_EMPTY, 0U, false}},
- "",
- NULL,
- NULL,
- 0U,
- false,
- false};
+ Options opts = {serd_default_options(), "", NULL, NULL, 0U, false, false};
// Parse all command line options (which must precede inputs)
SerdStatus st = SERD_SUCCESS;
diff --git a/tools/serd-sort.c b/tools/serd-sort.c
new file mode 100644
index 00000000..3b9c829a
--- /dev/null
+++ b/tools/serd-sort.c
@@ -0,0 +1,267 @@
+// Copyright 2011-2023 David Robillard <d@drobilla.net>
+// SPDX-License-Identifier: ISC
+
+#include "console.h"
+
+#include "serd/cursor.h"
+#include "serd/describe.h"
+#include "serd/env.h"
+#include "serd/inserter.h"
+#include "serd/model.h"
+#include "serd/reader.h"
+#include "serd/sink.h"
+#include "serd/statement.h"
+#include "serd/status.h"
+#include "serd/syntax.h"
+#include "serd/writer.h"
+#include "zix/attributes.h"
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+/* Application (after parsing command-line arguments) */
+
+// All options
+typedef struct {
+ SerdCommonOptions common;
+ const char* collation;
+ char* const* inputs;
+ intptr_t n_inputs;
+ SerdStatementOrder order;
+ SerdDescribeFlags flags;
+} Options;
+
+ZIX_PURE_FUNC static bool
+input_has_graphs(const Options opts)
+{
+ if (opts.common.input.syntax) {
+ return serd_syntax_has_graphs(opts.common.input.syntax);
+ }
+
+ for (intptr_t i = 0U; i < opts.n_inputs; ++i) {
+ if (serd_syntax_has_graphs(serd_guess_syntax(opts.inputs[i]))) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+// Run the tool using the given options
+static SerdStatus
+run(const Options opts)
+{
+ SerdTool app = {{NULL, NULL, NULL, NULL}, NULL, NULL, NULL};
+
+ // Set up the writing environment
+ SerdStatus st = SERD_SUCCESS;
+ if ((st = serd_tool_setup(&app, "serd-sort", opts.common))) {
+ serd_tool_cleanup(app);
+ return st;
+ }
+
+ // Determine the default order to store statements in the model
+ const bool with_graphs = input_has_graphs(opts);
+ const SerdStatementOrder default_order = opts.collation ? opts.order
+ : with_graphs ? SERD_ORDER_GSPO
+ : SERD_ORDER_SPO;
+
+ const SerdModelFlags flags =
+ (SerdModelFlags)(with_graphs * SERD_STORE_GRAPHS);
+
+ SerdModel* const model = serd_model_new(app.world, default_order, flags);
+
+ if (!opts.collation) {
+ // If we are pretty-printing, we need an O** index
+ serd_model_add_index(model, SERD_ORDER_OPS);
+
+ if (with_graphs) {
+ // If we have graphs we still need the SPO index for finding subjects
+ serd_model_add_index(model, SERD_ORDER_SPO);
+ }
+ }
+
+ // Read all the inputs into an inserter to load the model
+ SerdSink* const inserter = serd_inserter_new(model, NULL);
+ if (st || (st = serd_read_inputs(app.world,
+ opts.common,
+ app.env,
+ opts.n_inputs,
+ opts.inputs,
+ inserter))) {
+ serd_sink_free(inserter);
+ serd_model_free(model);
+ serd_tool_cleanup(app);
+ return st;
+ }
+
+ // Write the model to the output
+ const SerdSink* const target = serd_writer_sink(app.writer);
+ if (opts.collation) {
+ SerdCursor* const cursor =
+ serd_model_begin_ordered(NULL, model, opts.order);
+
+ st = serd_env_write_prefixes(app.env, target);
+
+ for (const SerdStatement* statement = NULL;
+ !st && (statement = serd_cursor_get(cursor));
+ serd_cursor_advance(cursor)) {
+ st = serd_sink_write_statement(target, 0U, statement);
+ }
+
+ serd_cursor_free(NULL, cursor);
+ } else {
+ SerdCursor* const cursor = serd_model_begin(NULL, model);
+
+ if (!(st = serd_env_write_prefixes(app.env, target))) {
+ st = serd_describe_range(NULL, cursor, target, opts.flags);
+ }
+
+ serd_cursor_free(NULL, cursor);
+ }
+
+ if (!st) {
+ st = serd_writer_finish(app.writer);
+ }
+
+ serd_sink_free(inserter);
+ serd_model_free(model);
+
+ const SerdStatus cst = serd_tool_cleanup(app);
+ return st ? st : cst;
+}
+
+/* Command-line interface (before setting up serd) */
+
+static SerdStatus
+parse_statement_order(const char* const string, SerdStatementOrder* const order)
+{
+ static const char* const strings[] = {"SPO",
+ "SOP",
+ "OPS",
+ "OSP",
+ "PSO",
+ "POS",
+ "GSPO",
+ "GSOP",
+ "GOPS",
+ "GOSP",
+ "GPSO",
+ "GPOS",
+ NULL};
+
+ for (unsigned i = 0; strings[i]; ++i) {
+ if (!strcmp(string, strings[i])) {
+ *order = (SerdStatementOrder)i;
+ return SERD_SUCCESS;
+ }
+ }
+
+ return SERD_BAD_ARG;
+}
+
+static int
+print_usage(const char* const name, const bool error)
+{
+ static const char* const description =
+ "Reorder RDF data by loading everything into a model then writing it.\n"
+ "INPUT can be a local filename, or \"-\" to read from standard input.\n\n"
+ " -B BASE_URI Base URI or path for resolving relative references.\n"
+ " -I SYNTAX Input syntax turtle/ntriples/trig/nquads, or option\n"
+ " lax/variables/relative/global/generated.\n"
+ " -O SYNTAX Output syntax empty/turtle/ntriples/nquads, or option\n"
+ " ascii/expanded/verbatim/terse/lax.\n"
+ " -V Display version information and exit.\n"
+ " -b BYTES I/O block size.\n"
+ " -c COLLATION An optional \"G\" then the letters \"SPO\" in any order.\n"
+ " -h Display this help and exit.\n"
+ " -k BYTES Parser stack size.\n"
+ " -o FILENAME Write output to FILENAME instead of stdout.\n"
+ " -t Do not write type as \"a\" before other properties.\n";
+
+ FILE* const os = error ? stderr : stdout;
+ fprintf(os, "%s", error ? "\n" : "");
+ fprintf(os, "Usage: %s [OPTION]... [INPUT]...\n", name);
+ fprintf(os, "%s", description);
+ return error;
+}
+
+// Parse the option pointed to by `iter`, and advance it to the next one
+static SerdStatus
+parse_option(OptionIter* const iter, Options* const opts)
+{
+#define ARG_ERRORF(fmt, ...) \
+ fprintf(stderr, "%s: " fmt, iter->argv[0], __VA_ARGS__)
+
+ SerdStatus st = serd_parse_common_option(iter, &opts->common);
+ if (st != SERD_FAILURE) {
+ return st;
+ }
+
+ const char opt = iter->argv[iter->a][iter->f];
+ switch (opt) {
+ case 'V':
+ return serd_print_version("serd-sort");
+
+ case 'c':
+ if (!(st = serd_get_argument(iter, &opts->collation))) {
+ if ((st = parse_statement_order(opts->collation, &opts->order))) {
+ ARG_ERRORF("unknown collation \"%s\"\n", opts->collation);
+ return st;
+ }
+ }
+ return st;
+
+ case 'h':
+ print_usage(iter->argv[0], false);
+ return SERD_FAILURE;
+
+ default:
+ break;
+ }
+
+ ARG_ERRORF("invalid option -- '%c'\n", opt);
+ return SERD_BAD_ARG;
+
+#undef ARG_ERRORF
+}
+
+int
+main(const int argc, char* const* const argv)
+{
+ char default_input[] = "-";
+ char* default_inputs[] = {default_input};
+
+ Options opts = {serd_default_options(), NULL, NULL, 0U, SERD_ORDER_SPO, 0U};
+
+ // Parse all command line options (which must precede inputs)
+ SerdStatus st = SERD_SUCCESS;
+ OptionIter iter = {argv, argc, 1, 1};
+ while (!serd_option_iter_is_end(iter)) {
+ if ((st = parse_option(&iter, &opts))) {
+ return (st == SERD_FAILURE) ? 0 : print_usage(argv[0], true);
+ }
+ }
+
+ // Order statements to match longhand mode if necessary
+ if (opts.common.output.flags & SERD_WRITE_LONGHAND) {
+ opts.flags |= SERD_NO_TYPE_FIRST;
+ }
+
+ // Every argument past the last option is an input
+ opts.inputs = argv + iter.a;
+ opts.n_inputs = argc - iter.a;
+ if (opts.n_inputs == 0) {
+ opts.n_inputs = 1;
+ opts.inputs = default_inputs;
+ }
+
+ // Don't add prefixes to blank node labels if there is only one input
+ if (opts.n_inputs == 1) {
+ opts.common.input.flags |= SERD_READ_GLOBAL;
+ }
+
+ return run(opts) > SERD_FAILURE;
+}