From 3cc5a4fcaea8f1e2fd47c53135b53f8edcb8619d Mon Sep 17 00:00:00 2001 From: David Robillard Date: Wed, 5 Apr 2023 07:01:18 -0400 Subject: Factor out and rewrite command-line interface --- NEWS | 1 + doc/man/serd-pipe.1 | 98 +++++---- test/meson.build | 43 +++- test/run_suite.py | 3 +- test/serd_test_util/__init__.py | 13 -- test/test_stdin.py | 2 +- test/trig_no_extension | 11 + test/trig_unknown_extension.n3 | 11 + tools/.clang-tidy | 1 - tools/console.c | 331 ++++++++++++++++++++++++++++-- tools/console.h | 96 ++++++++- tools/serd-pipe.c | 441 ++++++++++++++-------------------------- 12 files changed, 679 insertions(+), 372 deletions(-) create mode 100644 test/trig_no_extension create mode 100644 test/trig_unknown_extension.n3 diff --git a/NEWS b/NEWS index a222c99b..3fece2d3 100644 --- a/NEWS +++ b/NEWS @@ -11,6 +11,7 @@ serd (1.1.1) unstable; urgency=medium * Make nodes opaque * Preserve anonymous graph syntax in TriG * Preserve long or short quoting from input documents + * Refine and simplify command-line interface to support new features * Remove SERD_DISABLE_DEPRECATED and SERD_DEPRECATED_BY * Remove serd_uri_to_path() * Remove support for reading Turtle named inline nodes extension diff --git a/doc/man/serd-pipe.1 b/doc/man/serd-pipe.1 index b1e01990..4d475985 100644 --- a/doc/man/serd-pipe.1 +++ b/doc/man/serd-pipe.1 @@ -8,15 +8,15 @@ .Nd read and write RDF data .Sh SYNOPSIS .Nm serd-pipe -.Op Fl Cfhqv +.Op Fl CVhq .Op Fl B Ar base .Op Fl I Ar syntax .Op Fl O Ar syntax +.Op Fl R Ar root .Op Fl b Ar bytes .Op Fl k Ar bytes -.Op Fl r Ar root +.Op Fl o Ar filename .Op Fl s Ar string -.Op Fl w Ar filename .Op Ar input ... .Sh DESCRIPTION .Nm @@ -60,12 +60,34 @@ then input is read as TriG and output is written as NQuads The options are as follows: .Bl -tag -width 3n .It Fl B Ar base -Input base URI. -Relative URI references in the input will be resolved against this. -When the input is a file, -the URI of the file is automatically used as the base URI. -This option can be used to override that, -or to provide a base URI for input from stdin or a string. +Base URI, path, or +.Cm rebase +to use the output path. +This is used to resolve relative URI references in the input. +.Pp +If the input is a file, +its path is used by default, +so relative paths are written as they are in the input. +The special +.Cm rebase +argument will instead use the output path set by the +.Fl o +option, +so paths are written relative to the output file. +.Pp +The distinction matters when reading from bundles of files that refer to each other. +For example, +when copying +.Pa in/manifest.ttl +to +.Pa out/manifest.ttl , +the relative URI reference +.Ql +will be written as +.Ql <../in/data.ttl> +when using +.Fl o +.Cm rebase . .It Fl C Convert literals to canonical form. Literals with supported XSD datatypes will be parsed and rewritten canonically. @@ -155,26 +177,7 @@ The .Cm empty syntax suppresses the output, so that only warnings and errors will be printed. -.It Fl b Ar bytes -I/O block size. -This is the number of bytes in a file that will be read or written at once. -The default is 4096, which should perform well in most cases. -Note that this only applies to files, standard input and output are always processed one byte at a time. -.It Fl f -Fast and loose URI mode: -preserve full URIs (without qualifying or making relative), -and pass prefixed names through as-is. -.It Fl h -Print the command line options. -.It Fl k Ar bytes -Parser stack size. -Parsing is performed using a pre-allocated stack for performance and security reasons. -By default, the stack is 1 MiB, which should be sufficient for most data. -This can be increased to support unusually structured data and huge literals, -or decreased to reduce overall memory requirements and reduce startup time. -.It Fl q -Suppress all output except data. -.It Fl r Ar root +.It Fl R Ar root Keep relative URIs within a .Ar root URI. @@ -188,29 +191,44 @@ if .Pa /home/you/file.ttl is written to the file .Pa /home/me/output.ttl -using the destination's base URI, -then it could be written as +using +.Fl B Cm rebase , +then it will be written as .Li <../you/file.ttl> . Setting -.Fl r Li file:///home/me/ +.Fl R Pa /home/me/ would prevent references from .Dq escaping like this, so the above would instead be written as -.Li , -since it can't be expressed relative to the root URI. +.Li . .Pp This is useful for keeping relative references within some directory. -.It Fl s Ar string -Parse -.Ar string -as input. -.It Fl v +.It Fl V Display version information and exit. -.It Fl w Ar filename +.It Fl b Ar bytes +I/O block size. +This is the number of bytes in a file that will be read or written at once. +The default is 4096, which should perform well in most cases. +Note that this only applies to files, standard input and output are always processed one byte at a time. +.It Fl h +Print the command line options. +.It Fl k Ar bytes +Parser stack size. +Parsing is performed using a pre-allocated stack for performance and security reasons. +By default, the stack is 1 MiB, which should be sufficient for most data. +This can be increased to support unusually structured data and huge literals, +or decreased to reduce overall memory requirements and reduce startup time. +.It Fl o Ar filename Write output to the given .Ar filename instead of stdout. +.It Fl q +Suppress all output except data. +.It Fl s Ar string +Parse +.Ar string +as input. .El .Sh ENVIRONMENT Errors and warnings are printed in color by default if the output is a terminal. diff --git a/test/meson.build b/test/meson.build index 0e887ccb..43fc7211 100644 --- a/test/meson.build +++ b/test/meson.build @@ -181,32 +181,27 @@ simple_command_tests = { 'bad': [ ['-B', 'nonuriorpath'], ['-B'], - ['-I', 'turtle'], ['-I', 'unknown'], ['-I'], ['-O', 'unknown'], ['-O'], + ['-R'], ['-b', '-1'], ['-b', '1024junk'], - ['-b', '9223372036854775807'], ['-b'], - ['-k', '-1'], - ['-k', '1024junk'], ['-k', '9223372036854775807'], ['-k'], ['-qi'], - ['-r'], ['-s', ' a .'], ['-s'], - ['-w'], ['-z'], ], 'good': [ ['--help'], ['--version'], + ['-V'], ['-h'], ['-k', '512', '-s', 'a .'], - ['-v'], ], }, } @@ -236,10 +231,16 @@ if is_variable('serd_pipe') endforeach endforeach - test('none', serd_pipe, env: test_env, should_fail: true, suite: cmd_suite) - # Base URI options + test( + 'bad_rebase', + serd_pipe, + args: ['-B', 'rebase', serd_ttl], + env: test_env, + should_fail: true, + suite: cmd_suite, + ) test( 'base', files('test_base.py'), @@ -247,6 +248,13 @@ if is_variable('serd_pipe') env: test_env, suite: cmd_suite, ) + test( + 'dir_base', + serd_pipe, + args: ['-B', serd_src_root / '', serd_ttl], + env: test_env, + suite: cmd_suite, + ) # Log @@ -286,6 +294,21 @@ if is_variable('serd_pipe') input_suite = ['tools', 'pipe', 'input'] + good_input_tests = { + 'unknown_extension': [files('trig_unknown_extension.n3')], + 'no_extension': [files('trig_no_extension')], + } + + foreach name, args : good_input_tests + test( + name, + serd_pipe, + args: args, + env: test_env, + suite: input_suite, + ) + endforeach + bad_input_tests = { 'string': ['-s', ' a .'], 'no_such_file': ['no_such_file'], @@ -513,7 +536,7 @@ test_suites = { files('extra/root/manifest.ttl'), ns_serdtest + 'root/', '--', - ['-r', 'http://example.org/top/root/'], + ['-R', 'http://example.org/top/root/'], ], 'terse': [ files('extra/terse/manifest.ttl'), diff --git a/test/run_suite.py b/test/run_suite.py index 16e527af..52a418ef 100755 --- a/test/run_suite.py +++ b/test/run_suite.py @@ -40,8 +40,7 @@ TEST_TYPES = [ def run_eval_test(command, in_path, good_path, out_path): """Run a positive eval test and return whether the output matches.""" - syntax = util.syntax_from_path(out_path) - command = command + ["-O", syntax, "-w", out_path, in_path] + command = command + ["-o", out_path, in_path] subprocess.check_call(command, encoding="utf-8") with open(good_path, "r", encoding="utf-8") as good: diff --git a/test/serd_test_util/__init__.py b/test/serd_test_util/__init__.py index f465a4b7..5f0e0033 100644 --- a/test/serd_test_util/__init__.py +++ b/test/serd_test_util/__init__.py @@ -110,19 +110,6 @@ def file_path(suite_dir, uri): return os.path.relpath(os.path.join(suite_dir, os.path.basename(uri))) -def syntax_from_path(path): - """Return the serd syntax name corresponding to a file path.""" - - extensions = { - ".ttl": "turtle", - ".nt": "ntriples", - ".trig": "trig", - ".nq": "nquads", - } - - return extensions[os.path.splitext(path)[1]] - - def earl_assertion(test, passed, asserter): """Return a Turtle description of an assertion for the test report.""" diff --git a/test/test_stdin.py b/test/test_stdin.py index 790d34ce..7a2ab34e 100755 --- a/test/test_stdin.py +++ b/test/test_stdin.py @@ -10,7 +10,7 @@ import serd_test_util as util args = util.wrapper_args(__doc__) -command = [args.tool, "-I", "ntriples", "-B", "http://example.org", "-"] +command = [args.tool, "-I", "ntriples", "-B", "http://example.org"] DOC = "<{0}s> <{0}p> <{0}o> .".format("http://example.org/") diff --git a/test/trig_no_extension b/test/trig_no_extension new file mode 100644 index 00000000..e1c292d9 --- /dev/null +++ b/test/trig_no_extension @@ -0,0 +1,11 @@ +# Copyright 2023 David Robillard +# SPDX-License-Identifier: 0BSD OR ISC + +@prefix eg: . + +eg:g { + eg:s + eg:p [ + a eg:Object + ] . +} diff --git a/test/trig_unknown_extension.n3 b/test/trig_unknown_extension.n3 new file mode 100644 index 00000000..e1c292d9 --- /dev/null +++ b/test/trig_unknown_extension.n3 @@ -0,0 +1,11 @@ +# Copyright 2023 David Robillard +# SPDX-License-Identifier: 0BSD OR ISC + +@prefix eg: . + +eg:g { + eg:s + eg:p [ + a eg:Object + ] . +} diff --git a/tools/.clang-tidy b/tools/.clang-tidy index f7c82d78..113631df 100644 --- a/tools/.clang-tidy +++ b/tools/.clang-tidy @@ -8,5 +8,4 @@ Checks: > -concurrency-mt-unsafe, -hicpp-signed-bitwise, -llvm-header-guard, - -readability-function-cognitive-complexity, InheritParentConfig: true diff --git a/tools/console.c b/tools/console.c index 94c6dc79..2a861d1b 100644 --- a/tools/console.c +++ b/tools/console.c @@ -3,8 +3,15 @@ #include "console.h" -#include "serd/serd.h" +#include "serd/log.h" +#include "serd/node.h" +#include "serd/stream.h" +#include "serd/string.h" +#include "serd/syntax.h" +#include "serd/uri.h" +#include "serd/version.h" #include "zix/allocator.h" +#include "zix/attributes.h" #include "zix/filesystem.h" #ifdef _WIN32 @@ -15,9 +22,86 @@ # include #endif +#include +#include +#include #include +#include #include +#define MAX_DEPTH 128U + +ZIX_PURE_FUNC bool +serd_option_iter_is_end(const OptionIter iter) +{ + return iter.a >= iter.argc || iter.argv[iter.a][0] != '-' || + !iter.argv[iter.a][iter.f]; +} + +SerdStatus +serd_option_iter_advance(OptionIter* const iter) +{ + if (!iter->argv[iter->a][++iter->f]) { + ++iter->a; + iter->f = 1; + } + + return SERD_SUCCESS; +} + +SerdStatus +serd_tool_setup(SerdTool* const tool, + const char* const program, + SerdCommonOptions options) +{ + // Open the output first, since if that fails we have nothing to do + const char* const out_path = options.out_filename; + if (!((tool->out = serd_open_tool_output(out_path)).stream)) { + fprintf(stderr, + "%s: failed to open output file (%s)\n", + program, + strerror(errno)); + return SERD_BAD_STREAM; + } + + // We have something to write to, so build the writing environment + const SerdLimits limits = {options.stack_size, MAX_DEPTH}; + if (!(tool->world = serd_world_new(NULL)) || + serd_world_set_limits(tool->world, limits) || + !(tool->env = serd_create_env( + NULL, program, options.base_uri, options.out_filename)) || + !(tool->writer = serd_writer_new( + tool->world, + serd_choose_syntax( + tool->world, options.output, options.out_filename, SERD_NQUADS), + options.output.flags, + tool->env, + &tool->out, + options.block_size))) { + fprintf(stderr, "%s: failed to set up writing environment\n", program); + return SERD_UNKNOWN_ERROR; + } + + return SERD_SUCCESS; +} + +SerdStatus +serd_tool_cleanup(SerdTool tool) +{ + SerdStatus st = SERD_SUCCESS; + if (tool.out.stream) { + // Close the output stream explicitly to check if there were any errors + if ((st = serd_close_output(&tool.out))) { + perror("write error"); + } + } + + serd_writer_free(tool.writer); + serd_env_free(tool.env); + serd_world_free(tool.world); + return st; +} + void serd_set_stream_utf8_mode(FILE* const stream) { @@ -28,7 +112,7 @@ serd_set_stream_utf8_mode(FILE* const stream) #endif } -int +SerdStatus serd_print_version(const char* const program) { printf("%s %d.%d.%d \n", @@ -42,34 +126,90 @@ serd_print_version(const char* const program) "This is free software; you are free to change and redistribute it.\n" "There is NO WARRANTY, to the extent permitted by law.\n"); - return 0; + return SERD_FAILURE; +} + +SerdStatus +serd_get_argument(OptionIter* const iter, const char** const argument) +{ + const char flag = iter->argv[iter->a][iter->f++]; + + if (iter->argv[iter->a][iter->f] || (iter->a + 1) == iter->argc) { + fprintf( + stderr, "%s: option requires an argument -- %c\n", iter->argv[0], flag); + return SERD_BAD_ARG; + } + + *argument = iter->argv[++iter->a]; + ++iter->a; + iter->f = 1; + return SERD_SUCCESS; +} + +SerdStatus +serd_get_size_argument(OptionIter* const iter, size_t* const argument) +{ + SerdStatus st = SERD_SUCCESS; + const char* string = NULL; + if ((st = serd_get_argument(iter, &string))) { + return st; + } + + char* endptr = NULL; + const long size = strtol(string, &endptr, 10); + if (size <= 0 || size == LONG_MAX || *endptr != '\0') { + return SERD_BAD_ARG; + } + + *argument = (size_t)size; + return SERD_SUCCESS; } SerdStatus serd_set_base_uri_from_path(SerdEnv* const env, const char* const path) { - char* const input_path = zix_canonical_path(NULL, path); - if (!input_path) { + const size_t path_len = strlen(path); + char* const real_path = zix_canonical_path(NULL, path); + if (!real_path) { return SERD_BAD_ARG; } - SerdNode* const file_uri = serd_node_new( - NULL, serd_a_file_uri(serd_string(input_path), serd_empty_string())); + const size_t real_path_len = strlen(real_path); + SerdNode* base_node = NULL; + if (path[path_len - 1] == '/' || path[path_len - 1] == '\\') { + char* const base_path = (char*)calloc(real_path_len + 2, 1); + + memcpy(base_path, real_path, real_path_len + 1); + base_path[real_path_len] = path[path_len - 1]; - serd_env_set_base_uri(env, serd_node_string_view(file_uri)); - serd_node_free(NULL, file_uri); - zix_free(NULL, input_path); + base_node = serd_node_new( + NULL, serd_a_file_uri(serd_string(base_path), serd_empty_string())); + + free(base_path); + } else { + base_node = serd_node_new( + NULL, serd_a_file_uri(serd_string(real_path), serd_empty_string())); + } + + serd_env_set_base_uri(env, serd_node_string_view(base_node)); + serd_node_free(NULL, base_node); + zix_free(NULL, real_path); return SERD_SUCCESS; } SerdSyntax -serd_choose_syntax(SerdWorld* const world, - const SerdSyntax requested, - const char* const filename) +serd_choose_syntax(SerdWorld* const world, + const SerdSyntaxOptions options, + const char* const filename, + const SerdSyntax fallback) { - if (requested) { - return requested; + if (options.overridden || options.syntax != SERD_SYNTAX_EMPTY) { + return options.syntax; + } + + if (!filename || !strcmp(filename, "-")) { + return fallback; } const SerdSyntax guessed = serd_guess_syntax(filename); @@ -118,7 +258,26 @@ serd_set_input_option(const SerdStringView name, } } - return SERD_FAILURE; + return SERD_BAD_ARG; +} + +SerdStatus +serd_parse_input_argument(OptionIter* const iter, + SerdSyntaxOptions* const options) +{ + SerdStatus st = SERD_SUCCESS; + const char* argument = NULL; + + if (!(st = serd_get_argument(iter, &argument))) { + if ((st = serd_set_input_option( + serd_string(argument), &options->syntax, &options->flags))) { + fprintf(stderr, "%s: unknown option \"%s\"\n", iter->argv[0], argument); + } else if (!strcmp(argument, "empty") || options->syntax) { + options->overridden = true; + } + } + + return st; } SerdStatus @@ -154,9 +313,87 @@ serd_set_output_option(const SerdStringView name, } } + return SERD_BAD_ARG; +} + +SerdStatus +serd_parse_output_argument(OptionIter* const iter, + SerdSyntaxOptions* const options) +{ + SerdStatus st = SERD_SUCCESS; + const char* argument = NULL; + + if (!(st = serd_get_argument(iter, &argument))) { + if ((st = serd_set_output_option( + serd_string(argument), &options->syntax, &options->flags))) { + fprintf(stderr, "%s: unknown option \"%s\"\n", iter->argv[0], argument); + } else if (!strcmp(argument, "empty") || options->syntax) { + options->overridden = true; + } + } + + return st; +} + +SerdStatus +serd_parse_common_option(OptionIter* const iter, SerdCommonOptions* const opts) +{ + const char opt = iter->argv[iter->a][iter->f]; + switch (opt) { + case 'B': + return serd_get_argument(iter, &opts->base_uri); + + case 'I': + return serd_parse_input_argument(iter, &opts->input); + + case 'O': + return serd_parse_output_argument(iter, &opts->output); + + case 'b': + return serd_get_size_argument(iter, &opts->block_size); + + case 'k': + return serd_get_size_argument(iter, &opts->stack_size); + + case 'o': + return serd_get_argument(iter, &opts->out_filename); + + default: + break; + } + return SERD_FAILURE; } +SerdEnv* +serd_create_env(SerdAllocator* const allocator, + const char* const program, + const char* const base_string, + const char* const out_filename) +{ + const bool is_rebase = base_string && !strcmp(base_string, "rebase"); + if (is_rebase && !out_filename) { + fprintf(stderr, "%s: rebase requires an output filename\n", program); + return NULL; + } + + if (base_string && serd_uri_string_has_scheme(base_string)) { + return serd_env_new(allocator, serd_string(base_string)); + } + + SerdEnv* const env = serd_env_new(allocator, serd_empty_string()); + if (base_string && base_string[0]) { + const SerdStatus st = serd_set_base_uri_from_path(env, base_string); + if (st) { + fprintf(stderr, "%s: invalid base URI \"%s\"\n", program, base_string); + serd_env_free(env); + return NULL; + } + } + + return env; +} + /// Wrapper for getc that is compatible with SerdReadFunc but faster than fread static size_t serd_file_read_byte(void* buf, size_t size, size_t nmemb, void* stream) @@ -201,3 +438,65 @@ serd_open_tool_output(const char* const filename) return serd_open_output_file(filename); } + +SerdStatus +serd_read_source(SerdWorld* const world, + const SerdCommonOptions opts, + SerdEnv* const env, + const SerdSyntax syntax, + SerdInputStream* const in, + const char* const name, + const SerdSink* const sink) +{ + SerdReader* const reader = + serd_reader_new(world, syntax, opts.input.flags, env, sink); + + SerdNode* const name_node = serd_node_new(NULL, serd_a_string(name)); + SerdStatus st = serd_reader_start(reader, in, name_node, opts.block_size); + serd_node_free(NULL, name_node); + if (!st) { + st = serd_reader_read_document(reader); + } + + serd_reader_free(reader); + return st; +} + +SerdStatus +serd_read_inputs(SerdWorld* const world, + const SerdCommonOptions opts, + SerdEnv* const env, + const intptr_t n_inputs, + char* const* const inputs, + const SerdSink* const sink) +{ + SerdStatus st = SERD_SUCCESS; + + for (intptr_t i = 0; !st && i < n_inputs; ++i) { + // Use the filename as the base URI if possible if user didn't override it + const char* const in_path = inputs[i]; + if (!opts.base_uri[0] && !!strcmp(in_path, "-")) { + serd_set_base_uri_from_path(env, in_path); + } + + // Open the input stream + SerdInputStream in = serd_open_tool_input(in_path); + if (!in.stream) { + return SERD_BAD_ARG; + } + + // Read the entire file + st = serd_read_source( + world, + opts, + env, + serd_choose_syntax(world, opts.input, in_path, SERD_TRIG), + &in, + !strcmp(in_path, "-") ? "stdin" : in_path, + sink); + + serd_close_input(&in); + } + + return st; +} diff --git a/tools/console.h b/tools/console.h index a7e8423f..d475aebc 100644 --- a/tools/console.h +++ b/tools/console.h @@ -6,44 +6,132 @@ #include "serd/env.h" #include "serd/input_stream.h" +#include "serd/memory.h" #include "serd/output_stream.h" #include "serd/reader.h" +#include "serd/sink.h" #include "serd/status.h" #include "serd/string_view.h" #include "serd/syntax.h" #include "serd/world.h" #include "serd/writer.h" +#include +#include #include +// Iterator over command-line options with support for BSD-style flag merging +typedef struct { + char* const* argv; ///< Complete argument vector (from main) + int argc; ///< Total number of arguments (from main) + int a; ///< Argument index (index into argv) + int f; ///< Flag index (offset in argv[arg]) +} OptionIter; + +// Options for the input or output syntax +typedef struct { + SerdSyntax syntax; ///< User-specified syntax, or empty + uint32_t flags; ///< SerdReaderFlags or SerdWriterFlags + bool overridden; ///< True if syntax was explicitly given +} SerdSyntaxOptions; + +// Options common to all command-line tools +typedef struct { + const char* base_uri; + const char* out_filename; + size_t block_size; + size_t stack_size; + SerdSyntaxOptions input; + SerdSyntaxOptions output; +} SerdCommonOptions; + +// Common "global" state of a command-line tool that writes data +typedef struct { + SerdOutputStream out; + SerdWorld* world; + SerdEnv* env; + SerdWriter* writer; +} SerdTool; + +bool +serd_option_iter_is_end(OptionIter iter); + +SerdStatus +serd_option_iter_advance(OptionIter* iter); + +SerdStatus +serd_tool_setup(SerdTool* tool, const char* program, SerdCommonOptions options); + +SerdStatus +serd_tool_cleanup(SerdTool tool); + void serd_set_stream_utf8_mode(FILE* stream); -int +SerdStatus serd_print_version(const char* program); SerdStatus serd_set_base_uri_from_path(SerdEnv* env, const char* path); SerdSyntax -serd_choose_syntax(SerdWorld* world, - SerdSyntax requested, - const char* filename); +serd_choose_syntax(SerdWorld* world, + SerdSyntaxOptions options, + const char* filename, + SerdSyntax fallback); + +SerdStatus +serd_get_argument(OptionIter* iter, const char** argument); + +SerdStatus +serd_get_size_argument(OptionIter* iter, size_t* argument); SerdStatus serd_set_input_option(SerdStringView name, SerdSyntax* syntax, SerdReaderFlags* flags); +SerdStatus +serd_parse_input_argument(OptionIter* iter, SerdSyntaxOptions* options); + SerdStatus serd_set_output_option(SerdStringView name, SerdSyntax* syntax, SerdWriterFlags* flags); +SerdStatus +serd_parse_output_argument(OptionIter* iter, SerdSyntaxOptions* options); + +SerdStatus +serd_parse_common_option(OptionIter* iter, SerdCommonOptions* opts); + +SerdEnv* +serd_create_env(SerdAllocator* allocator, + const char* program, + const char* base_string, + const char* out_filename); + SerdInputStream serd_open_tool_input(const char* filename); SerdOutputStream serd_open_tool_output(const char* filename); +SerdStatus +serd_read_source(SerdWorld* world, + SerdCommonOptions opts, + SerdEnv* env, + SerdSyntax syntax, + SerdInputStream* in, + const char* name, + const SerdSink* sink); + +SerdStatus +serd_read_inputs(SerdWorld* world, + SerdCommonOptions opts, + SerdEnv* env, + intptr_t n_inputs, + char* const* inputs, + const SerdSink* sink); + #endif // SERD_TOOLS_CONSOLE_H diff --git a/tools/serd-pipe.c b/tools/serd-pipe.c index 66600006..bbed9fa8 100644 --- a/tools/serd-pipe.c +++ b/tools/serd-pipe.c @@ -4,344 +4,215 @@ #include "console.h" #include "serd/canon.h" -#include "serd/env.h" #include "serd/input_stream.h" #include "serd/log.h" -#include "serd/node.h" -#include "serd/output_stream.h" #include "serd/reader.h" #include "serd/sink.h" #include "serd/status.h" #include "serd/string_view.h" #include "serd/syntax.h" -#include "serd/world.h" #include "serd/writer.h" -#include "zix/allocator.h" -#include "zix/filesystem.h" -#include -#include #include +#include #include -#include #include -#define SERDI_ERROR(msg) fprintf(stderr, "serd-pipe: " msg) -#define SERDI_ERRORF(fmt, ...) fprintf(stderr, "serd-pipe: " fmt, __VA_ARGS__) +/* Application (after parsing command-line arguments) */ -#define MAX_DEPTH 128U - -static int -print_usage(const char* const name, const bool error) -{ - static const char* const description = - "Read and write RDF syntax.\n" - "Use - for INPUT to read from standard input.\n\n" - " -B BASE_URI Base URI.\n" - " -C Convert literals to canonical form.\n" - " -I SYNTAX Input syntax (turtle/ntriples/trig/nquads),\n" - " or flag (lax/variables/verbatim).\n" - " -O SYNTAX Output syntax (empty/turtle/ntriples/nquads),\n" - " or flag (ascii/expanded/verbatim/terse/lax).\n" - " -b BYTES I/O block size.\n" - " -h Display this help and exit.\n" - " -k BYTES Parser stack size.\n" - " -q Suppress all output except data.\n" - " -r ROOT_URI Keep relative URIs within ROOT_URI.\n" - " -s STRING Parse STRING as input.\n" - " -v Display version information and exit.\n" - " -w FILENAME Write output to FILENAME instead of stdout.\n"; - - FILE* const os = error ? stderr : stdout; - fprintf(os, "%s", error ? "\n" : ""); - fprintf(os, "Usage: %s [OPTION]... INPUT...\n", name); - fprintf(os, "%s", description); - return error ? 1 : 0; -} - -static int -missing_arg(const char* const name, const char opt) -{ - SERDI_ERRORF("option requires an argument -- '%c'\n", opt); - return print_usage(name, true); -} +// All options +typedef struct { + SerdCommonOptions common; + const char* root_uri; + const char* input_string; + char* const* inputs; + intptr_t n_inputs; + bool canonical; + bool quiet; +} Options; +// Run the tool using the given options static SerdStatus -read_file(SerdWorld* const world, - const SerdSyntax syntax, - const SerdReaderFlags flags, - SerdEnv* const env, - const SerdSink* const sink, - const size_t stack_size, - const char* const filename, - const size_t block_size) +run(const Options opts) { - SerdInputStream in = serd_open_tool_input(filename); - if (!in.stream) { - SERDI_ERRORF( - "failed to open input file `%s' (%s)\n", filename, strerror(errno)); + SerdTool app = {{NULL, NULL, NULL, NULL}, NULL, NULL, NULL}; - return SERD_BAD_STREAM; + // Set up the writing environment + SerdStatus st = SERD_SUCCESS; + if ((st = serd_tool_setup(&app, "serd-pipe", opts.common))) { + serd_tool_cleanup(app); + return st; } - const SerdLimits limits = {stack_size, MAX_DEPTH}; - serd_world_set_limits(world, limits); + if (opts.quiet) { + serd_set_log_func(app.world, serd_quiet_log_func, NULL); + } - SerdReader* reader = serd_reader_new(world, syntax, flags, env, sink); - SerdStatus st = serd_reader_start(reader, &in, NULL, block_size); + serd_writer_set_root_uri(app.writer, serd_string(opts.root_uri)); - st = st ? st : serd_reader_read_document(reader); + // Set up the output pipeline: [canon] -> writer + const SerdSink* const target = serd_writer_sink(app.writer); + const SerdSink* sink = target; + SerdSink* canon = NULL; + if (opts.canonical) { + canon = serd_canon_new(app.world, target, opts.common.input.flags); + sink = canon; + } - serd_reader_free(reader); - serd_close_input(&in); + if (opts.input_string) { + const char* position = opts.input_string; + SerdInputStream in = serd_open_input_string(&position); - return st; -} + st = serd_read_source( + app.world, + opts.common, + app.env, + serd_choose_syntax(app.world, opts.common.input, NULL, SERD_TRIG), + &in, + "string", + sink); -int -main(int argc, char** argv) -{ - const char* const prog = argv[0]; - - SerdNode* base = NULL; - SerdSyntax input_syntax = SERD_SYNTAX_EMPTY; - SerdSyntax output_syntax = SERD_SYNTAX_EMPTY; - SerdReaderFlags reader_flags = 0; - SerdWriterFlags writer_flags = 0; - bool osyntax_set = false; - bool canonical = false; - bool quiet = false; - size_t block_size = 4096U; - size_t stack_size = 1048576U; - const char* input_string = NULL; - const char* root_uri = NULL; - const char* out_filename = NULL; - int a = 1; - for (; a < argc && argv[a][0] == '-'; ++a) { - if (argv[a][1] == '\0') { - break; - } + serd_close_input(&in); + } - if (!strcmp(argv[a], "--help")) { - return print_usage(prog, false); - } + // Read all the inputs, which drives the writer to emit the output + if (st || + (st = serd_read_inputs( + app.world, opts.common, app.env, opts.n_inputs, opts.inputs, sink)) || + (st = serd_writer_finish(app.writer))) { + serd_sink_free(canon); + serd_tool_cleanup(app); + return st; + } - if (!strcmp(argv[a], "--version")) { - return serd_print_version(argv[0]); - } + serd_sink_free(canon); + return serd_tool_cleanup(app); +} - for (int o = 1; argv[a][o]; ++o) { - const char opt = argv[a][o]; - - if (opt == 'C') { - canonical = true; - } else if (opt == 'h') { - return print_usage(prog, false); - } else if (opt == 'q') { - quiet = true; - } else if (opt == 'v') { - return serd_print_version(argv[0]); - } else if (argv[a][1] == 'B') { - if (++a == argc) { - return missing_arg(prog, 'B'); - } - - base = serd_node_new(NULL, serd_a_uri_string(argv[a])); - break; - } else if (opt == 'I') { - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(prog, 'I'); - } - - if (serd_set_input_option( - serd_string(argv[a]), &input_syntax, &reader_flags)) { - return print_usage(argv[0], true); - } - break; - } else if (opt == 'O') { - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(prog, 'O'); - } - - if (serd_set_output_option( - serd_string(argv[a]), &output_syntax, &writer_flags)) { - return print_usage(argv[0], true); - } - - osyntax_set = - output_syntax != SERD_SYNTAX_EMPTY || !strcmp(argv[a], "empty"); - - break; - } else if (opt == 'b') { - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(prog, 'b'); - } - - char* endptr = NULL; - const long size = strtol(argv[a], &endptr, 10); - if (size < 1 || size == LONG_MAX || *endptr != '\0') { - SERDI_ERRORF("invalid block size `%s'\n", argv[a]); - return 1; - } - block_size = (size_t)size; - break; - } else if (opt == 'k') { - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(prog, 'k'); - } - - char* endptr = NULL; - const long size = strtol(argv[a], &endptr, 10); - if (size <= 0 || size == LONG_MAX || *endptr != '\0') { - SERDI_ERRORF("invalid stack size '%s'\n", argv[a]); - return 1; - } - stack_size = (size_t)size; - break; - } else if (opt == 'r') { - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(prog, 'r'); - } - - root_uri = argv[a]; - break; - } else if (opt == 's') { - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(prog, 's'); - } - - input_string = argv[a]; - break; - } else if (opt == 'w') { - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(argv[0], 'w'); - } - - out_filename = argv[a]; - break; - } else { - SERDI_ERRORF("invalid option -- '%s'\n", argv[a] + 1); - return print_usage(prog, true); - } - } - } +/* Command-line interface (before setting up serd) */ - if (a == argc && !input_string) { - SERDI_ERROR("missing input\n"); - return print_usage(prog, true); - } +static int +print_usage(const char* const name, const bool error) +{ + static const char* const description = + "Read and write RDF data.\n" + "INPUT can be a local filename, or \"-\" to read from standard input.\n\n" + " -B BASE_URI Base URI or path for resolving relative references.\n" + " -C Convert literals to canonical form.\n" + " -I SYNTAX Input syntax turtle/ntriples/trig/nquads, or option\n" + " lax/variables/relative/global/generated.\n" + " -O SYNTAX Output syntax empty/turtle/ntriples/nquads, or option\n" + " ascii/expanded/verbatim/terse/lax.\n" + " -R ROOT_URI Keep relative URIs within ROOT_URI.\n" + " -V Display version information and exit.\n" + " -b BYTES I/O block size.\n" + " -h Display this help and exit.\n" + " -k BYTES Parser stack size.\n" + " -o FILENAME Write output to FILENAME instead of stdout.\n" + " -q Suppress warning and error output.\n" + " -s STRING Parse STRING as input.\n"; - serd_set_stream_utf8_mode(stdin); + FILE* const os = error ? stderr : stdout; + fprintf(os, "%s", error ? "\n" : ""); + fprintf(os, "Usage: %s [OPTION]... [INPUT]...\n", name); + fprintf(os, "%s", description); + return error; +} - char* const* const inputs = argv + a; - const int n_inputs = argc - a; +// Parse the option pointed to by `iter`, and advance it to the next one +static SerdStatus +parse_option(OptionIter* const iter, Options* const opts) +{ +#define ARG_ERRORF(fmt, ...) \ + fprintf(stderr, "%s: " fmt, iter->argv[0], __VA_ARGS__) - bool input_has_graphs = serd_syntax_has_graphs(input_syntax); - for (int i = a; i < argc; ++i) { - if (serd_syntax_has_graphs(serd_guess_syntax(argv[i]))) { - input_has_graphs = true; - break; - } + SerdStatus st = serd_parse_common_option(iter, &opts->common); + if (st != SERD_FAILURE) { + return st; } - if (!output_syntax && !osyntax_set) { - output_syntax = input_has_graphs ? SERD_NQUADS : SERD_NTRIPLES; + if (!strcmp(iter->argv[iter->a], "--help")) { + print_usage(iter->argv[0], false); + return SERD_FAILURE; } - if (!base && n_inputs == 1 && - (output_syntax == SERD_NQUADS || output_syntax == SERD_NTRIPLES)) { - // Choose base URI from the single input path - char* const input_path = zix_canonical_path(NULL, inputs[0]); - if (!input_path || - !(base = serd_node_new( - NULL, - serd_a_file_uri(serd_string(input_path), serd_empty_string())))) { - SERDI_ERRORF("unable to determine base URI from path %s\n", inputs[0]); - } - zix_free(NULL, input_path); + if (!strcmp(iter->argv[iter->a], "--version")) { + return serd_print_version(iter->argv[0]); } - SerdWorld* const world = serd_world_new(NULL); - const SerdLimits limits = {stack_size, MAX_DEPTH}; - serd_world_set_limits(world, limits); - - SerdEnv* const env = serd_env_new( - NULL, base ? serd_node_string_view(base) : serd_empty_string()); + const char opt = iter->argv[iter->a][iter->f]; + switch (opt) { + case 'C': + opts->canonical = true; + return serd_option_iter_advance(iter); - SerdOutputStream out = serd_open_tool_output(out_filename); - if (!out.stream) { - perror("serdi: error opening output file"); - return 1; - } + case 'R': + return serd_get_argument(iter, &opts->root_uri); - SerdWriter* const writer = - serd_writer_new(world, output_syntax, writer_flags, env, &out, block_size); + case 'V': + return serd_print_version("serd-pipe"); - const SerdSink* sink = serd_writer_sink(writer); + case 'h': + print_usage(iter->argv[0], false); + return SERD_FAILURE; - SerdSink* canon = NULL; - if (canonical) { - sink = canon = serd_canon_new(world, sink, reader_flags); - } + case 'q': + opts->quiet = true; + return serd_option_iter_advance(iter); - if (quiet) { - serd_set_log_func(world, serd_quiet_log_func, NULL); - } + case 's': + return serd_get_argument(iter, &opts->input_string); - if (root_uri) { - serd_writer_set_root_uri(writer, serd_string(root_uri)); + default: + break; } - SerdStatus st = SERD_SUCCESS; - if (input_string) { - const char* position = input_string; - SerdInputStream string_in = serd_open_input_string(&position); + ARG_ERRORF("invalid option -- '%c'\n", opt); + return SERD_BAD_ARG; - SerdReader* const reader = serd_reader_new( - world, input_syntax ? input_syntax : SERD_TRIG, reader_flags, env, sink); +#undef ARG_ERRORF +} - if (!(st = serd_reader_start(reader, &string_in, NULL, 1U))) { - st = serd_reader_read_document(reader); +int +main(const int argc, char* const* const argv) +{ + char default_input[] = {'-', '\0'}; + char* default_inputs[] = {default_input}; + + Options opts = {{"", + NULL, + 4096U, + 1048576U, + {SERD_SYNTAX_EMPTY, 0U, false}, + {SERD_SYNTAX_EMPTY, 0U, false}}, + "", + NULL, + NULL, + 0U, + false, + false}; + + // Parse all command line options (which must precede inputs) + SerdStatus st = SERD_SUCCESS; + OptionIter iter = {argv, argc, 1, 1}; + while (!serd_option_iter_is_end(iter)) { + if ((st = parse_option(&iter, &opts))) { + return (st == SERD_FAILURE) ? 0 : print_usage(argv[0], true); } - - serd_reader_free(reader); - serd_close_input(&string_in); - } - - if (n_inputs == 1) { - reader_flags |= SERD_READ_GLOBAL; } - for (int i = 0; !st && i < n_inputs; ++i) { - if (!base && !!strcmp(inputs[i], "-")) { - if ((st = serd_set_base_uri_from_path(env, inputs[i]))) { - SERDI_ERRORF("failed to set base URI from path %s\n", inputs[i]); - break; - } - } - - if ((st = read_file(world, - serd_choose_syntax(world, input_syntax, inputs[i]), - reader_flags, - env, - sink, - stack_size, - inputs[i], - block_size))) { - break; - } + // Every argument past the last option is an input + opts.inputs = argv + iter.a; + opts.n_inputs = argc - iter.a; + if (opts.n_inputs + (bool)opts.input_string == 0) { + opts.n_inputs = 1; + opts.inputs = default_inputs; } - serd_sink_free(canon); - serd_writer_free(writer); - serd_env_free(env); - serd_node_free(NULL, base); - serd_world_free(world); - - if (serd_close_output(&out)) { - perror("serd-pipe: write error"); - st = SERD_BAD_STREAM; + // Don't add prefixes to blank node labels if there is only one input + if (opts.n_inputs + (bool)opts.input_string == 1) { + opts.common.input.flags |= SERD_READ_GLOBAL; } - return (st > SERD_FAILURE) ? 1 : 0; + return run(opts) > SERD_FAILURE; } -- cgit v1.2.1