diff options
author | David Robillard <d@drobilla.net> | 2021-10-21 15:38:10 -0400 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2022-01-28 21:57:07 -0500 |
commit | b404312686874e539b617d1f27ccbaa5a82936af (patch) | |
tree | c2fdb2cc046e6da53071629cd1750dcc327e6cd9 /tools | |
parent | d4aec28ba8ad24d5aef3ee12beeb1b805148eab1 (diff) | |
download | serd-b404312686874e539b617d1f27ccbaa5a82936af.tar.gz serd-b404312686874e539b617d1f27ccbaa5a82936af.tar.bz2 serd-b404312686874e539b617d1f27ccbaa5a82936af.zip |
Replace serdi with more fine-grained tools
Especially with the new functionality, the complexity of the command-line
interface alone was really becoming unmanageable. The serdi implementation
also had the highest cyclomatic complexity of the entire codebase by a huge
margin.
So, take a page from the Unix philosophy and split serdi into several more
finely-honed tools that can be freely composed. Though there is still
unfortunately quite a bit of option overlap between them due to the common
details of reading RDF, I think the resulting tools are a lot easier to
understand, both from a user and a developer perspective.
Diffstat (limited to 'tools')
-rw-r--r-- | tools/console.c | 288 | ||||
-rw-r--r-- | tools/console.h | 104 | ||||
-rw-r--r-- | tools/meson.build | 32 | ||||
-rw-r--r-- | tools/serd-filter.c | 287 | ||||
-rw-r--r-- | tools/serd-pipe.c | 209 | ||||
-rw-r--r-- | tools/serd-sort.c | 274 | ||||
-rw-r--r-- | tools/serdi.c | 502 |
7 files changed, 1168 insertions, 528 deletions
diff --git a/tools/console.c b/tools/console.c index ea5fd7ee..f1e78d75 100644 --- a/tools/console.c +++ b/tools/console.c @@ -26,9 +26,64 @@ # include <io.h> #endif +#include <errno.h> +#include <limits.h> #include <stdint.h> +#include <stdlib.h> #include <string.h> +SerdStatus +serd_tool_setup(SerdTool* const tool, + const char* const program, + SerdCommonOptions options) +{ + // Open the output first, since if that fails we have nothing to do + const char* const out_path = options.out_filename; + if (!(tool->out = serd_open_output(out_path, options.block_size))) { + fprintf(stderr, + "%s: failed to open output file (%s)\n", + program, + strerror(errno)); + return SERD_ERR_UNKNOWN; + } + + // We have something to write to, so build the writing environment + if (!(tool->world = serd_world_new()) || + !(tool->env = + serd_create_env(program, options.base_uri, options.out_filename)) || + !(tool->writer = serd_writer_new( + tool->world, + serd_choose_syntax( + tool->world, options.output, options.out_filename, SERD_NQUADS), + options.output.flags, + tool->env, + tool->out))) { + fprintf(stderr, "%s: failed to set up writing environment\n", program); + return SERD_ERR_INTERNAL; + } + + return SERD_SUCCESS; +} + +SerdStatus +serd_tool_cleanup(const SerdTool tool) +{ + SerdStatus st = SERD_SUCCESS; + if (tool.out) { + // Close the output stream explicitly to check if there were any errors + if (serd_byte_sink_close(tool.out)) { + perror("write error"); + st = SERD_ERR_BAD_WRITE; + } + } + + serd_writer_free(tool.writer); + serd_env_free(tool.env); + serd_world_free(tool.world); + serd_byte_sink_free(tool.out); + return st; +} + void serd_set_stream_utf8_mode(FILE* const stream) { @@ -39,7 +94,7 @@ serd_set_stream_utf8_mode(FILE* const stream) #endif } -int +SerdStatus serd_print_version(const char* const program) { printf("%s %d.%d.%d <http://drobilla.net/software/serd>\n", @@ -53,7 +108,43 @@ serd_print_version(const char* const program) "This is free software; you are free to change and redistribute it.\n" "There is NO WARRANTY, to the extent permitted by law.\n"); - return 0; + return SERD_FAILURE; +} + +SerdStatus +serd_get_argument(OptionIter* const iter, const char** const argument) +{ + const char flag = iter->argv[iter->a][iter->f++]; + + if (iter->argv[iter->a][iter->f] || (iter->a + 1) == iter->argc) { + fprintf( + stderr, "%s: option requires an argument -- %c\n", iter->argv[0], flag); + return SERD_ERR_BAD_ARG; + } + + *argument = iter->argv[++iter->a]; + ++iter->a; + iter->f = 1; + return SERD_SUCCESS; +} + +SerdStatus +serd_get_size_argument(OptionIter* const iter, size_t* const argument) +{ + SerdStatus st = SERD_SUCCESS; + const char* string = NULL; + if ((st = serd_get_argument(iter, &string))) { + return st; + } + + char* endptr = NULL; + const long size = strtol(string, &endptr, 10); + if (size <= 0 || size == LONG_MAX || *endptr != '\0') { + return SERD_ERR_BAD_ARG; + } + + *argument = (size_t)size; + return SERD_SUCCESS; } SerdStatus @@ -89,8 +180,26 @@ serd_set_input_option(const SerdStringView name, } } - // SERDI_ERRORF("invalid input option `%s'\n", name.buf); - return SERD_FAILURE; + return SERD_ERR_BAD_ARG; +} + +SerdStatus +serd_parse_input_argument(OptionIter* const iter, + SerdSyntaxOptions* const options) +{ + SerdStatus st = SERD_SUCCESS; + const char* argument = NULL; + + if (!(st = serd_get_argument(iter, &argument))) { + if ((st = serd_set_input_option( + SERD_STRING(argument), &options->syntax, &options->flags))) { + fprintf(stderr, "%s: unknown option \"%s\"\n", iter->argv[0], argument); + } else if (!strcmp(argument, "empty") || options->syntax) { + options->overridden = true; + } + } + + return st; } SerdStatus @@ -126,16 +235,90 @@ serd_set_output_option(const SerdStringView name, } } + return SERD_ERR_BAD_ARG; +} + +SerdStatus +serd_parse_output_argument(OptionIter* const iter, + SerdSyntaxOptions* const options) +{ + SerdStatus st = SERD_SUCCESS; + const char* argument = NULL; + + if (!(st = serd_get_argument(iter, &argument))) { + if ((st = serd_set_output_option( + SERD_STRING(argument), &options->syntax, &options->flags))) { + fprintf(stderr, "%s: unknown option \"%s\"\n", iter->argv[0], argument); + } else if (!strcmp(argument, "empty") || options->syntax) { + options->overridden = true; + } + } + + return st; +} + +SerdStatus +serd_parse_common_option(OptionIter* const iter, SerdCommonOptions* const opts) +{ + const char opt = iter->argv[iter->a][iter->f]; + switch (opt) { + case 'B': + return serd_get_argument(iter, &opts->base_uri); + + case 'I': + return serd_parse_input_argument(iter, &opts->input); + + case 'O': + return serd_parse_output_argument(iter, &opts->output); + + case 'b': + return serd_get_size_argument(iter, &opts->block_size); + + case 'k': + return serd_get_size_argument(iter, &opts->stack_size); + + case 'o': + return serd_get_argument(iter, &opts->out_filename); + + default: + break; + } + return SERD_FAILURE; } +SerdEnv* +serd_create_env(const char* const program, + const char* const base_string, + const char* const out_filename) +{ + const bool is_rebase = base_string && !strcmp(base_string, "rebase"); + if (is_rebase && !out_filename) { + fprintf(stderr, "%s: rebase requires an output filename\n", program); + return NULL; + } + + if (base_string && serd_uri_string_has_scheme(base_string)) { + return serd_env_new(SERD_STRING(base_string)); + } + + SerdEnv* const env = serd_env_new(SERD_EMPTY_STRING()); + serd_set_base_uri_from_path(env, is_rebase ? out_filename : base_string); + return env; +} + SerdSyntax -serd_choose_input_syntax(SerdWorld* const world, - const SerdSyntax requested, - const char* const filename) +serd_choose_syntax(SerdWorld* const world, + const SerdSyntaxOptions options, + const char* const filename, + const SerdSyntax fallback) { - if (requested) { - return requested; + if (options.overridden || options.syntax != SERD_SYNTAX_EMPTY) { + return options.syntax; + } + + if (!filename || !strcmp(filename, "-")) { + return fallback; } const SerdSyntax guessed = serd_guess_syntax(filename); @@ -202,17 +385,90 @@ serd_open_output(const char* const filename, const size_t block_size) SerdStatus serd_set_base_uri_from_path(SerdEnv* const env, const char* const path) { - char* const input_path = serd_canonical_path(path); - if (!input_path) { + const size_t path_len = path ? strlen(path) : 0u; + if (!path_len) { return SERD_ERR_BAD_ARG; } - SerdNode* const file_uri = - serd_new_file_uri(SERD_STRING(input_path), SERD_EMPTY_STRING()); + char* const real_path = serd_canonical_path(path); + if (!real_path) { + return SERD_ERR_BAD_ARG; + } + + const size_t real_path_len = strlen(real_path); + SerdNode* base_node = NULL; + if (path[path_len - 1] == '/' || path[path_len - 1] == '\\') { + char* const base_path = (char*)calloc(real_path_len + 2, 1); + memcpy(base_path, real_path, real_path_len); + base_path[real_path_len] = path[path_len - 1]; + + base_node = serd_new_file_uri(SERD_STRING(base_path), SERD_EMPTY_STRING()); + free(base_path); + } else { + base_node = serd_new_file_uri(SERD_STRING(real_path), SERD_EMPTY_STRING()); + } - serd_env_set_base_uri(env, serd_node_string_view(file_uri)); - serd_node_free(file_uri); - serd_free(input_path); + serd_env_set_base_uri(env, serd_node_string_view(base_node)); + serd_node_free(base_node); + serd_free(real_path); return SERD_SUCCESS; } + +SerdStatus +serd_read_source(SerdWorld* const world, + const SerdCommonOptions opts, + SerdEnv* const env, + const SerdSyntax syntax, + SerdByteSource* const in, + const SerdSink* const sink) +{ + SerdReader* const reader = serd_reader_new( + world, syntax, opts.input.flags, env, sink, opts.stack_size); + + SerdStatus st = serd_reader_start(reader, in); + if (!st) { + st = serd_reader_read_document(reader); + } + + serd_reader_free(reader); + return st; +} + +SerdStatus +serd_read_inputs(SerdWorld* const world, + const SerdCommonOptions opts, + SerdEnv* const env, + const intptr_t n_inputs, + char* const* const inputs, + const SerdSink* const sink) +{ + SerdStatus st = SERD_SUCCESS; + + for (intptr_t i = 0; !st && i < n_inputs; ++i) { + // Use the filename as the base URI if possible if user didn't override it + const char* const in_path = inputs[i]; + if (!opts.base_uri[0] && strcmp(in_path, "-")) { + serd_set_base_uri_from_path(env, in_path); + } + + // Open the input stream + SerdByteSource* const in = serd_open_input(in_path, opts.block_size); + if (!in) { + return SERD_ERR_BAD_ARG; + } + + // Read the entire file + st = serd_read_source( + world, + opts, + env, + serd_choose_syntax(world, opts.input, in_path, SERD_TRIG), + in, + sink); + + serd_byte_source_free(in); + } + + return st; +} diff --git a/tools/console.h b/tools/console.h index 16f6fd14..cb227e8e 100644 --- a/tools/console.h +++ b/tools/console.h @@ -16,28 +16,108 @@ #include "serd/serd.h" +#include <stdbool.h> +#include <stdint.h> #include <stdio.h> +// Iterator over command-line options with support for BSD-style flag merging +typedef struct { + char* const* argv; ///< Complete argument vector (from main) + int argc; ///< Total number of arguments (from main) + int a; ///< Argument index (index into argv) + int f; ///< Flag index (offset in argv[arg]) +} OptionIter; + +// Options for the input or output syntax +typedef struct { + SerdSyntax syntax; ///< User-specified syntax, or empty + uint32_t flags; ///< SerdReaderFlags or SerdWriterFlags + bool overridden; ///< True if syntax was explicitly given +} SerdSyntaxOptions; + +// Options common to all command-line tools +typedef struct { + const char* base_uri; + const char* out_filename; + size_t block_size; + size_t stack_size; + SerdSyntaxOptions input; + SerdSyntaxOptions output; +} SerdCommonOptions; + +// Common "global" state of a command-line tool that writes data +typedef struct { + SerdByteSink* out; + SerdWorld* world; + SerdEnv* env; + SerdWriter* writer; +} SerdTool; + +static inline bool +serd_option_iter_is_end(const OptionIter iter) +{ + return iter.a >= iter.argc || iter.argv[iter.a][0] != '-' || + !iter.argv[iter.a][iter.f]; +} + +static inline SerdStatus +serd_option_iter_advance(OptionIter* const iter) +{ + if (!iter->argv[iter->a][++iter->f]) { + ++iter->a; + iter->f = 1; + } + + return SERD_SUCCESS; +} + +SerdStatus +serd_tool_setup(SerdTool* tool, const char* program, SerdCommonOptions options); + +SerdStatus +serd_tool_cleanup(SerdTool tool); + void serd_set_stream_utf8_mode(FILE* stream); -int +SerdStatus serd_print_version(const char* program); SerdStatus +serd_get_argument(OptionIter* iter, const char** argument); + +SerdStatus +serd_get_size_argument(OptionIter* iter, size_t* argument); + +SerdStatus serd_set_input_option(SerdStringView name, SerdSyntax* syntax, SerdReaderFlags* flags); SerdStatus +serd_parse_input_argument(OptionIter* iter, SerdSyntaxOptions* options); + +SerdStatus serd_set_output_option(SerdStringView name, SerdSyntax* syntax, SerdWriterFlags* flags); +SerdStatus +serd_parse_output_argument(OptionIter* iter, SerdSyntaxOptions* options); + +SerdStatus +serd_parse_common_option(OptionIter* iter, SerdCommonOptions* opts); + +SerdEnv* +serd_create_env(const char* program, + const char* base_string, + const char* out_filename); + SerdSyntax -serd_choose_input_syntax(SerdWorld* world, - SerdSyntax requested, - const char* filename); +serd_choose_syntax(SerdWorld* world, + SerdSyntaxOptions options, + const char* filename, + SerdSyntax fallback); SerdByteSource* serd_open_input(const char* filename, size_t block_size); @@ -47,3 +127,19 @@ serd_open_output(const char* filename, size_t block_size); SerdStatus serd_set_base_uri_from_path(SerdEnv* env, const char* path); + +SerdStatus +serd_read_source(SerdWorld* world, + SerdCommonOptions opts, + SerdEnv* env, + SerdSyntax syntax, + SerdByteSource* in, + const SerdSink* sink); + +SerdStatus +serd_read_inputs(SerdWorld* world, + SerdCommonOptions opts, + SerdEnv* env, + intptr_t n_inputs, + char* const* inputs, + const SerdSink* sink); diff --git a/tools/meson.build b/tools/meson.build index 3054364a..d4964784 100644 --- a/tools/meson.build +++ b/tools/meson.build @@ -5,9 +5,29 @@ if get_option('static') tool_link_args += ['-static'] endif -serdi = executable('serdi', - ['serdi.c', 'console.c'], - c_args: tool_c_args, - link_args: tool_link_args, - install: true, - dependencies: serd_dep) +tools = [ + 'filter', + 'pipe', + 'sort', +] + +serd_filter = executable('serd-filter', + ['serd-filter.c', 'console.c'], + c_args: tool_c_args, + link_args: tool_link_args, + install: true, + dependencies: serd_dep) + +serd_pipe = executable('serd-pipe', + ['serd-pipe.c', 'console.c'], + c_args: tool_c_args, + link_args: tool_link_args, + install: true, + dependencies: serd_dep) + +serd_sort = executable('serd-sort', + ['serd-sort.c', 'console.c'], + c_args: tool_c_args, + link_args: tool_link_args, + install: true, + dependencies: serd_dep) diff --git a/tools/serd-filter.c b/tools/serd-filter.c new file mode 100644 index 00000000..789d3149 --- /dev/null +++ b/tools/serd-filter.c @@ -0,0 +1,287 @@ +/* + Copyright 2011-2021 David Robillard <d@drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "console.h" + +#include "serd/serd.h" + +#include <stdarg.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> + +/* Application (after parsing command-line arguments) */ + +// All options +typedef struct { + SerdCommonOptions common; + const char* pattern; + const char* pattern_file; + char* const* inputs; + intptr_t n_inputs; + bool invert; +} Options; + +// A single statement pattern +typedef struct { + SerdNode* s; + SerdNode* p; + SerdNode* o; + SerdNode* g; +} FilterPattern; + +// Handler for events read from a pattern +static SerdStatus +on_pattern_event(void* const handle, const SerdEvent* const event) +{ + if (event->type == SERD_STATEMENT) { + FilterPattern* const pat = (FilterPattern*)handle; + if (pat->s) { + return SERD_ERR_INVALID; + } + + const SerdStatement* const statement = event->statement.statement; + pat->s = serd_node_copy(serd_statement_subject(statement)); + pat->p = serd_node_copy(serd_statement_predicate(statement)); + pat->o = serd_node_copy(serd_statement_object(statement)); + pat->g = serd_node_copy(serd_statement_graph(statement)); + } + + return SERD_SUCCESS; +} + +// Parse a pattern from some input and return a new filter for it +static SerdSink* +parse_pattern(SerdWorld* const world, + const SerdSink* const sink, + SerdByteSource* const byte_source, + const bool inclusive) +{ + SerdEnv* const env = serd_env_new(SERD_EMPTY_STRING()); + FilterPattern pat = {NULL, NULL, NULL, NULL}; + SerdSink* in_sink = serd_sink_new(&pat, on_pattern_event, NULL); + SerdReader* reader = serd_reader_new( + world, SERD_NQUADS, SERD_READ_VARIABLES, env, in_sink, 4096); + + SerdStatus st = serd_reader_start(reader, byte_source); + if (!st) { + st = serd_reader_read_document(reader); + } + + serd_reader_free(reader); + serd_env_free(env); + serd_sink_free(in_sink); + + if (st) { + serd_logf(world, + SERD_LOG_LEVEL_ERROR, + "failed to parse pattern (%s)", + serd_strerror(st)); + return NULL; + } + + SerdSink* filter = + serd_filter_new(sink, pat.s, pat.p, pat.o, pat.g, inclusive); + + serd_node_free(pat.s); + serd_node_free(pat.p); + serd_node_free(pat.o); + serd_node_free(pat.g); + return filter; +} + +SERD_LOG_FUNC(2, 3) +static SerdStatus +log_error(SerdWorld* const world, const char* const fmt, ...) +{ + va_list args; + va_start(args, fmt); + + const SerdLogField file = {"SERD_FILE", "serd-filter"}; + const SerdStatus st = + serd_vxlogf(world, SERD_LOG_LEVEL_ERROR, 1, &file, fmt, args); + + va_end(args); + return st; +} + +// Run the tool using the given options +static SerdStatus +run(Options opts) +{ + SerdTool app = {NULL, NULL, NULL, NULL}; + + // Set up the writing environment + SerdStatus st = SERD_SUCCESS; + if ((st = serd_tool_setup(&app, "serd-filter", opts.common))) { + serd_tool_cleanup(app); + return st; + } + + const SerdSink* const target = serd_writer_sink(app.writer); + + // Open the pattern input (either a string or filename) + SerdByteSource* const pattern = + opts.pattern ? serd_byte_source_new_string(opts.pattern, NULL) + : opts.pattern_file + ? serd_byte_source_new_filename(opts.pattern_file, opts.common.block_size) + : NULL; + if (!pattern) { + log_error(app.world, "failed to open pattern"); + return SERD_ERR_UNKNOWN; + } + + // Set up the output pipeline: filter -> writer + SerdSink* const filter = + parse_pattern(app.world, target, pattern, !opts.invert); + if (!filter) { + log_error(app.world, "failed to set up filter"); + return SERD_ERR_UNKNOWN; + } + + // Read all the inputs, which drives the writer to emit the output + if (!(st = serd_read_inputs(app.world, + opts.common, + app.env, + opts.n_inputs, + opts.inputs, + filter))) { + st = serd_writer_finish(app.writer); + } + + if (st) { + log_error(app.world, "failed to read input (%s)", serd_strerror(st)); + } + + serd_sink_free(filter); + serd_byte_source_free(pattern); + + const SerdStatus cst = serd_tool_cleanup(app); + return st ? st : cst; +} + +/* Command-line interface (before setting up serd) */ + +static int +print_usage(const char* const name, const bool error) +{ + static const char* const description = + "Search for statements matching PATTERN in each INPUT.\n" + "INPUT can be a local filename, or \"-\" to read from standard input.\n\n" + " -B BASE_URI Base URI or path for resolving relative references.\n" + " -I SYNTAX Input syntax (turtle/ntriples/trig/nquads),\n" + " or option (lax/variables/relative/global/generated).\n" + " -O SYNTAX Output syntax (empty/turtle/ntriples/nquads),\n" + " or option (ascii/expanded/verbatim/terse/lax).\n" + " -V Display version information and exit.\n" + " -f PATTERN_FILE Read pattern from PATTERN_FILE instead.\n" + " -h Display this help and exit.\n" + " -k BYTES Parser stack size.\n" + " -o FILENAME Write output to FILENAME instead of stdout.\n" + " -v Invert filter to select non-matching statements.\n"; + + FILE* const os = error ? stderr : stdout; + fprintf(os, "%s", error ? "\n" : ""); + fprintf(os, "Usage: %s [OPTION]... PATTERN INPUT...\n", name); + fprintf(os, " %s [OPTION]... -f PATTERN_FILE INPUT...\n", name); + fprintf(os, "\n%s", description); + return error ? EXIT_FAILURE : EXIT_SUCCESS; +} + +// Parse the option pointed to by `iter`, and advance it to the next one +static SerdStatus +parse_option(OptionIter* const iter, Options* const opts) +{ +#define ARG_ERRORF(fmt, ...) \ + fprintf(stderr, "%s: " fmt, iter->argv[0], __VA_ARGS__) + + SerdStatus st = serd_parse_common_option(iter, &opts->common); + if (st != SERD_FAILURE) { + return st; + } + + const char opt = iter->argv[iter->a][iter->f]; + + switch (opt) { + case 'V': + return serd_print_version("serd-filter"); + + case 'f': + return serd_get_argument(iter, &opts->pattern_file); + + case 'h': + print_usage(iter->argv[0], false); + return SERD_FAILURE; + + case 'v': + opts->invert = true; + return serd_option_iter_advance(iter); + + default: + break; + } + + ARG_ERRORF("invalid option -- '%c'\n", opt); + return SERD_ERR_BAD_ARG; + +#undef ARG_ERRORF +} + +int +main(int argc, char** argv) +{ + Options opts = {{"", + NULL, + 4096u, + 1048576u, + {SERD_SYNTAX_EMPTY, 0u, false}, + {SERD_NQUADS, 0u, false}}, + NULL, + NULL, + NULL, + 0u, + false}; + + // Parse all command line options (which must precede inputs) + SerdStatus st = SERD_SUCCESS; + OptionIter iter = {argv, argc, 1, 1}; + while (!serd_option_iter_is_end(iter)) { + if ((st = parse_option(&iter, &opts))) { + return (st == SERD_FAILURE) ? 0 : print_usage(argv[0], true); + } + } + + // If -f isn't used, then the first positional argument is the pattern + if (!opts.pattern_file) { + opts.pattern = argv[iter.a++]; + } + + // Every argument past that is an input + opts.inputs = argv + iter.a; + opts.n_inputs = argc - iter.a; + if (opts.n_inputs == 0) { + fprintf(stderr, "%s: missing input\n", argv[0]); + return print_usage(argv[0], true); + } + + // Don't add prefixes to blank node labels if there is only one input + if (opts.n_inputs == 1) { + opts.common.input.flags |= SERD_READ_GLOBAL; + } + + return run(opts) > SERD_FAILURE; +} diff --git a/tools/serd-pipe.c b/tools/serd-pipe.c new file mode 100644 index 00000000..75b3e0d4 --- /dev/null +++ b/tools/serd-pipe.c @@ -0,0 +1,209 @@ +/* + Copyright 2011-2021 David Robillard <d@drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "console.h" + +#include "serd/serd.h" + +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> + +/* Application (after parsing command-line arguments) */ + +// All options +typedef struct { + SerdCommonOptions common; + const char* root_uri; + const char* input_string; + char* const* inputs; + intptr_t n_inputs; + bool canonical; + bool quiet; +} Options; + +// Run the tool using the given options +static SerdStatus +run(const Options opts) +{ + SerdTool app = {NULL, NULL, NULL, NULL}; + + // Set up the writing environment + SerdStatus st = SERD_SUCCESS; + if ((st = serd_tool_setup(&app, "serd-pipe", opts.common))) { + serd_tool_cleanup(app); + return st; + } + + if (opts.quiet) { + serd_set_log_func(app.world, serd_quiet_log_func, NULL); + } + + serd_writer_set_root_uri(app.writer, SERD_STRING(opts.root_uri)); + + // Set up the output pipeline: [canon] -> writer + const SerdSink* const target = serd_writer_sink(app.writer); + const SerdSink* sink = target; + SerdSink* canon = NULL; + if (opts.canonical) { + canon = serd_canon_new(app.world, target, opts.common.input.flags); + sink = canon; + } + + if (opts.input_string) { + SerdByteSource* const in = + serd_byte_source_new_string(opts.input_string, NULL); + + st = serd_read_source( + app.world, + opts.common, + app.env, + serd_choose_syntax(app.world, opts.common.input, NULL, SERD_TRIG), + in, + sink); + + serd_byte_source_free(in); + } + + // Read all the inputs, which drives the writer to emit the output + if (st || + (st = serd_read_inputs( + app.world, opts.common, app.env, opts.n_inputs, opts.inputs, sink)) || + (st = serd_writer_finish(app.writer))) { + serd_tool_cleanup(app); + return st; + } + + return serd_tool_cleanup(app); +} + +/* Command-line interface (before setting up serd) */ + +static int +print_usage(const char* const name, const bool error) +{ + static const char* const description = + "Read and write RDF data.\n" + "INPUT can be a local filename, or \"-\" to read from standard input.\n\n" + " -B BASE_URI Base URI or path for resolving relative references.\n" + " -C Convert literals to canonical form.\n" + " -I SYNTAX Input syntax (turtle/ntriples/trig/nquads),\n" + " or option (lax/variables/relative/global/generated).\n" + " -O SYNTAX Output syntax (empty/turtle/ntriples/nquads),\n" + " or option (ascii/expanded/verbatim/terse/lax).\n" + " -R ROOT_URI Keep relative URIs within ROOT_URI.\n" + " -V Display version information and exit.\n" + " -b BYTES I/O block size.\n" + " -h Display this help and exit.\n" + " -k BYTES Parser stack size.\n" + " -o FILENAME Write output to FILENAME instead of stdout.\n" + " -q Suppress warning and error output.\n" + " -s STRING Parse STRING as input.\n"; + + FILE* const os = error ? stderr : stdout; + fprintf(os, "%s", error ? "\n" : ""); + fprintf(os, "Usage: %s [OPTION]... INPUT...\n", name); + fprintf(os, "%s", description); + return error; +} + +// Parse the option pointed to by `iter`, and advance it to the next one +static SerdStatus +parse_option(OptionIter* const iter, Options* const opts) +{ +#define ARG_ERRORF(fmt, ...) \ + fprintf(stderr, "%s: " fmt, iter->argv[0], __VA_ARGS__) + + SerdStatus st = serd_parse_common_option(iter, &opts->common); + if (st != SERD_FAILURE) { + return st; + } + + const char opt = iter->argv[iter->a][iter->f]; + switch (opt) { + case 'C': + opts->canonical = true; + return serd_option_iter_advance(iter); + + case 'R': + return serd_get_argument(iter, &opts->root_uri); + + case 'V': + return serd_print_version("serd-pipe"); + + case 'h': + print_usage(iter->argv[0], false); + return SERD_FAILURE; + + case 'q': + opts->quiet = true; + return serd_option_iter_advance(iter); + + case 's': + return serd_get_argument(iter, &opts->input_string); + + default: + break; + } + + ARG_ERRORF("invalid option -- '%c'\n", opt); + return SERD_ERR_BAD_ARG; + +#undef ARG_ERRORF +} + +int +main(const int argc, char* const* const argv) +{ + char* const default_input[] = {"-"}; + + Options opts = {{"", + NULL, + 4096u, + 1048576u, + {SERD_SYNTAX_EMPTY, 0u, false}, + {SERD_SYNTAX_EMPTY, 0u, false}}, + "", + NULL, + NULL, + 0u, + false, + false}; + + // Parse all command line options (which must precede inputs) + SerdStatus st = SERD_SUCCESS; + OptionIter iter = {argv, argc, 1, 1}; + while (!serd_option_iter_is_end(iter)) { + if ((st = parse_option(&iter, &opts))) { + return (st == SERD_FAILURE) ? 0 : print_usage(argv[0], true); + } + } + + // Every argument past the last option is an input + opts.inputs = argv + iter.a; + opts.n_inputs = argc - iter.a; + if (opts.n_inputs + (bool)opts.input_string == 0) { + opts.n_inputs = 1; + opts.inputs = default_input; + } + + // Don't add prefixes to blank node labels if there is only one input + if (opts.n_inputs + (bool)opts.input_string == 1) { + opts.common.input.flags |= SERD_READ_GLOBAL; + } + + return run(opts) > SERD_FAILURE; +} diff --git a/tools/serd-sort.c b/tools/serd-sort.c new file mode 100644 index 00000000..deb79cb5 --- /dev/null +++ b/tools/serd-sort.c @@ -0,0 +1,274 @@ +/* + Copyright 2011-2021 David Robillard <d@drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "console.h" + +#include "serd/serd.h" + +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> + +/* Application (after parsing command-line arguments) */ + +// All options +typedef struct { + SerdCommonOptions common; + const char* root_uri; + const char* input_string; + const char* collation; + char* const* inputs; + intptr_t n_inputs; + SerdStatementOrder order; + SerdDescribeFlags flags; +} Options; + +static bool +input_has_graphs(const Options opts) +{ + if (opts.common.input.syntax) { + return serd_syntax_has_graphs(opts.common.input.syntax); + } + + for (intptr_t i = 0u; i < opts.n_inputs; ++i) { + if (serd_syntax_has_graphs(serd_guess_syntax(opts.inputs[i]))) { + return true; + } + } + + return false; +} + +// Run the tool using the given options +static SerdStatus +run(const Options opts) +{ + SerdTool app = {NULL, NULL, NULL, NULL}; + + // Set up the writing environment + SerdStatus st = SERD_SUCCESS; + if ((st = serd_tool_setup(&app, "serd-sort", opts.common))) { + serd_tool_cleanup(app); + return st; + } + + // Determine the default order to store statements in the model + const bool with_graphs = input_has_graphs(opts); + const SerdStatementOrder default_order = opts.collation ? opts.order + : with_graphs ? SERD_ORDER_GSPO + : SERD_ORDER_SPO; + + const SerdModelFlags flags = + (SerdModelFlags)(with_graphs * SERD_STORE_GRAPHS); + + SerdModel* const model = serd_model_new(app.world, default_order, flags); + + if (!opts.collation) { + // If we are pretty-printing, we need an O** index + serd_model_add_index(model, SERD_ORDER_OPS); + + if (with_graphs) { + // If we have graphs we still need the SPO index for finding subjects + serd_model_add_index(model, SERD_ORDER_SPO); + } + } + + // Read all the inputs into an inserter to load the model + SerdSink* const inserter = serd_inserter_new(model, NULL); + if (st || (st = serd_read_inputs(app.world, + opts.common, + app.env, + opts.n_inputs, + opts.inputs, + inserter))) { + serd_tool_cleanup(app); + return st; + } + + // Write the model to the output + const SerdSink* const target = serd_writer_sink(app.writer); + if (opts.collation) { + SerdCursor* const cursor = serd_model_begin_ordered(model, opts.order); + + serd_env_write_prefixes(app.env, target); + + for (const SerdStatement* statement = NULL; + !st && (statement = serd_cursor_get(cursor)); + serd_cursor_advance(cursor)) { + st = serd_sink_write_statement(target, 0u, statement); + } + + serd_cursor_free(cursor); + } else { + SerdCursor* const cursor = serd_model_begin(model); + + serd_env_write_prefixes(app.env, target); + + st = serd_describe_range(cursor, target, opts.flags); + + serd_cursor_free(cursor); + } + + if (!st) { + st = serd_writer_finish(app.writer); + } + + const SerdStatus cst = serd_tool_cleanup(app); + return st ? st : cst; +} + +/* Command-line interface (before setting up serd) */ + +static SerdStatus +parse_statement_order(const char* const string, SerdStatementOrder* const order) +{ + static const char* const strings[] = {"SPO", + "SOP", + "OPS", + "OSP", + "PSO", + "POS", + "GSPO", + "GSOP", + "GOPS", + "GOSP", + "GPSO", + "GPOS", + NULL}; + + for (unsigned i = 0; strings[i]; ++i) { + if (!strcmp(string, strings[i])) { + *order = (SerdStatementOrder)i; + return SERD_SUCCESS; + } + } + + return SERD_ERR_BAD_ARG; +} + +static int +print_usage(const char* const name, const bool error) +{ + static const char* const description = + "Reorder RDF data by loading everything into a model then writing it.\n" + "INPUT can be a local filename, or \"-\" to read from standard input.\n\n" + " -B BASE_URI Base URI or path for resolving relative references.\n" + " -I SYNTAX Input syntax (turtle/ntriples/trig/nquads),\n" + " or option (lax/variables/relative/global/generated).\n" + " -O SYNTAX Output syntax (empty/turtle/ntriples/nquads),\n" + " or option (ascii/expanded/verbatim/terse/lax).\n" + " -V Display version information and exit.\n" + " -b BYTES I/O block size.\n" + " -c COLLATION An optional \"G\" then the letters \"SPO\" in any order.\n" + " -h Display this help and exit.\n" + " -k BYTES Parser stack size.\n" + " -o FILENAME Write output to FILENAME instead of stdout.\n"; + + FILE* const os = error ? stderr : stdout; + fprintf(os, "%s", error ? "\n" : ""); + fprintf(os, "Usage: %s [OPTION]... INPUT...\n", name); + fprintf(os, "%s", description); + return error; +} + +// Parse the option pointed to by `iter`, and advance it to the next one +static SerdStatus +parse_option(OptionIter* const iter, Options* const opts) +{ +#define ARG_ERRORF(fmt, ...) \ + fprintf(stderr, "%s: " fmt, iter->argv[0], __VA_ARGS__) + + SerdStatus st = serd_parse_common_option(iter, &opts->common); + if (st != SERD_FAILURE) { + return st; + } + + const char opt = iter->argv[iter->a][iter->f]; + switch (opt) { + case 'R': + return serd_get_argument(iter, &opts->root_uri); + + case 'V': + return serd_print_version("serd-sort"); + + case 'c': + if (!(st = serd_get_argument(iter, &opts->collation))) { + if ((st = parse_statement_order(opts->collation, &opts->order))) { + ARG_ERRORF("unknown collation \"%s\"\n", opts->collation); + return st; + } + } + return st; + + case 'h': + print_usage(iter->argv[0], false); + return SERD_FAILURE; + + case 's': + return serd_get_argument(iter, &opts->input_string); + + default: + break; + } + + ARG_ERRORF("invalid option -- '%c'\n", opt); + return SERD_ERR_BAD_ARG; + +#undef ARG_ERRORF +} + +int +main(const int argc, char* const* const argv) +{ + Options opts = {{"", + NULL, + 4096u, + 1048576u, + {SERD_SYNTAX_EMPTY, 0u, false}, + {SERD_SYNTAX_EMPTY, 0u, false}}, + "", + NULL, + NULL, + NULL, + 0u, + SERD_ORDER_SPO, + 0u}; + + // Parse all command line options (which must precede inputs) + SerdStatus st = SERD_SUCCESS; + OptionIter iter = {argv, argc, 1, 1}; + while (!serd_option_iter_is_end(iter)) { + if ((st = parse_option(&iter, &opts))) { + return (st == SERD_FAILURE) ? 0 : print_usage(argv[0], true); + } + } + + // Every argument past the last option is an input + opts.inputs = argv + iter.a; + opts.n_inputs = argc - iter.a; + if (opts.n_inputs + (bool)opts.input_string == 0) { + fprintf(stderr, "%s: missing input\n", argv[0]); + return print_usage(argv[0], true); + } + + // Don't add prefixes to blank node labels if there is only one input + if (opts.n_inputs + (bool)opts.input_string == 1) { + opts.common.input.flags |= SERD_READ_GLOBAL; + } + + return run(opts) > SERD_FAILURE; +} diff --git a/tools/serdi.c b/tools/serdi.c deleted file mode 100644 index b1542727..00000000 --- a/tools/serdi.c +++ /dev/null @@ -1,502 +0,0 @@ -/* - Copyright 2011-2021 David Robillard <d@drobilla.net> - - Permission to use, copy, modify, and/or distribute this software for any - purpose with or without fee is hereby granted, provided that the above - copyright notice and this permission notice appear in all copies. - - THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -*/ - -#include "console.h" - -#include "serd/serd.h" - -#include <errno.h> -#include <limits.h> -#include <stdbool.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#define SERDI_ERROR(msg) fprintf(stderr, "serdi: " msg) -#define SERDI_ERRORF(fmt, ...) fprintf(stderr, "serdi: " fmt, __VA_ARGS__) - -typedef struct { - SerdNode* s; - SerdNode* p; - SerdNode* o; - SerdNode* g; -} FilterPattern; - -static int -print_usage(const char* const name, const bool error) -{ - static const char* const description = - "Read and write RDF syntax.\n" - "Use - for INPUT to read from standard input.\n\n" - " -C Convert literals to canonical form.\n" - " -F PATTERN Filter out statements that match PATTERN.\n" - " -G PATTERN Only include statements matching PATTERN.\n" - " -I BASE_URI Input base URI.\n" - " -b BYTES I/O block size.\n" - " -f Fast and loose mode (possibly ugly output).\n" - " -h Display this help and exit.\n" - " -i SYNTAX Input syntax (turtle/ntriples/trig/nquads),\n" - " or flag (lax/variables/verbatim).\n" - " -k BYTES Parser stack size.\n" - " -m Build a model in memory before writing.\n" - " -o SYNTAX Output syntax (empty/turtle/ntriples/nquads),\n" - " or flag (ascii/expanded/verbatim/terse/lax).\n" - " -q Suppress all output except data.\n" - " -r ROOT_URI Keep relative URIs within ROOT_URI.\n" - " -s STRING Parse STRING as input.\n" - " -v Display version information and exit.\n" - " -w FILENAME Write output to FILENAME instead of stdout.\n"; - - FILE* const os = error ? stderr : stdout; - fprintf(os, "%s", error ? "\n" : ""); - fprintf(os, "Usage: %s [OPTION]... INPUT...\n", name); - fprintf(os, "%s", description); - return error ? 1 : 0; -} - -static int -missing_arg(const char* const name, const char opt) -{ - SERDI_ERRORF("option requires an argument -- '%c'\n", opt); - return print_usage(name, true); -} - -static SerdStatus -on_filter_event(void* const handle, const SerdEvent* const event) -{ - if (event->type == SERD_STATEMENT) { - FilterPattern* const pat = (FilterPattern*)handle; - if (pat->s) { - return SERD_ERR_INVALID; - } - - const SerdStatement* const statement = event->statement.statement; - pat->s = serd_node_copy(serd_statement_subject(statement)); - pat->p = serd_node_copy(serd_statement_predicate(statement)); - pat->o = serd_node_copy(serd_statement_object(statement)); - pat->g = serd_node_copy(serd_statement_graph(statement)); - } - - return SERD_SUCCESS; -} - -static SerdSink* -parse_filter(SerdWorld* const world, - const SerdSink* const sink, - const char* const str, - const bool inclusive) -{ - SerdEnv* const env = serd_env_new(SERD_EMPTY_STRING()); - FilterPattern pat = {NULL, NULL, NULL, NULL}; - SerdSink* in_sink = serd_sink_new(&pat, on_filter_event, NULL); - SerdByteSource* byte_source = serd_byte_source_new_string(str, NULL); - SerdReader* reader = serd_reader_new( - world, SERD_NQUADS, SERD_READ_VARIABLES, env, in_sink, 4096); - - SerdStatus st = serd_reader_start(reader, byte_source); - if (!st) { - st = serd_reader_read_document(reader); - } - - serd_reader_free(reader); - serd_env_free(env); - serd_byte_source_free(byte_source); - serd_sink_free(in_sink); - - if (st) { - return NULL; - } - - SerdSink* filter = - serd_filter_new(sink, pat.s, pat.p, pat.o, pat.g, inclusive); - - serd_node_free(pat.s); - serd_node_free(pat.p); - serd_node_free(pat.o); - serd_node_free(pat.g); - return filter; -} - -static SerdStatus -read_file(SerdWorld* const world, - const SerdSyntax syntax, - const SerdReaderFlags flags, - SerdEnv* const env, - const SerdSink* const sink, - const size_t stack_size, - const char* const filename, - const size_t block_size) -{ - SerdByteSource* byte_source = serd_open_input(filename, block_size); - - if (!byte_source) { - SERDI_ERRORF( - "failed to open input file `%s' (%s)\n", filename, strerror(errno)); - - return SERD_ERR_UNKNOWN; - } - - SerdReader* reader = - serd_reader_new(world, syntax, flags, env, sink, stack_size); - - SerdStatus st = serd_reader_start(reader, byte_source); - - st = st ? st : serd_reader_read_document(reader); - - serd_reader_free(reader); - serd_byte_source_free(byte_source); - - return st; -} - -int -main(int argc, char** argv) -{ - const char* const prog = argv[0]; - - SerdNode* base = NULL; - SerdSyntax input_syntax = SERD_SYNTAX_EMPTY; - SerdSyntax output_syntax = SERD_SYNTAX_EMPTY; - SerdReaderFlags reader_flags = 0; - SerdWriterFlags writer_flags = 0; - bool no_inline = false; - bool osyntax_set = false; - bool use_model = false; - bool canonical = false; - bool quiet = false; - size_t block_size = 4096u; - size_t stack_size = 4194304; - const char* input_string = NULL; - const char* in_pattern = NULL; - const char* out_pattern = NULL; - const char* root_uri = NULL; - const char* out_filename = NULL; - int a = 1; - for (; a < argc && argv[a][0] == '-'; ++a) { - if (argv[a][1] == '\0') { - break; - } - - for (int o = 1; argv[a][o]; ++o) { - const char opt = argv[a][o]; - - if (opt == 'C') { - canonical = true; - } else if (opt == 'f') { - no_inline = true; - writer_flags |= (SERD_WRITE_EXPANDED | SERD_WRITE_VERBATIM); - } else if (opt == 'h') { - return print_usage(prog, false); - } else if (argv[a][1] == 'm') { - use_model = true; - } else if (opt == 'q') { - quiet = true; - } else if (opt == 'v') { - return serd_print_version(argv[0]); - } else if (argv[a][1] == 'F') { - if (++a == argc) { - return missing_arg(argv[0], 'F'); - } - - out_pattern = argv[a]; - break; - } else if (argv[a][1] == 'G') { - if (++a == argc) { - return missing_arg(argv[0], 'g'); - } - - in_pattern = argv[a]; - break; - } else if (argv[a][1] == 'I') { - if (++a == argc) { - return missing_arg(prog, 'I'); - } - - base = serd_new_uri(SERD_STRING(argv[a])); - break; - } else if (opt == 'b') { - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(prog, 'b'); - } - - char* endptr = NULL; - const long size = strtol(argv[a], &endptr, 10); - if (size < 1 || size == LONG_MAX || *endptr != '\0') { - SERDI_ERRORF("invalid block size `%s'\n", argv[a]); - return 1; - } - block_size = (size_t)size; - break; - } else if (opt == 'i') { - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(prog, 'i'); - } - - if (serd_set_input_option( - SERD_STRING(argv[a]), &input_syntax, &reader_flags)) { - return print_usage(argv[0], true); - } - break; - } else if (opt == 'k') { - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(prog, 'k'); - } - - char* endptr = NULL; - const long size = strtol(argv[a], &endptr, 10); - if (size <= 0 || size == LONG_MAX || *endptr != '\0') { - SERDI_ERRORF("invalid stack size `%s'\n", argv[a]); - return 1; - } - stack_size = (size_t)size; - break; - } else if (opt == 'o') { - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(prog, 'o'); - } - - if (serd_set_output_option( - SERD_STRING(argv[a]), &output_syntax, &writer_flags)) { - return print_usage(argv[0], true); - } - - osyntax_set = - output_syntax != SERD_SYNTAX_EMPTY || !strcmp(argv[a], "empty"); - - break; - } else if (opt == 'r') { - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(prog, 'r'); - } - - root_uri = argv[a]; - break; - } else if (opt == 's') { - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(prog, 's'); - } - - input_string = argv[a]; - break; - } else if (opt == 'w') { - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(argv[0], 'w'); - } - - out_filename = argv[a]; - break; - } else { - SERDI_ERRORF("invalid option -- '%s'\n", argv[a] + 1); - return print_usage(prog, true); - } - } - } - - if (in_pattern && out_pattern) { - SERDI_ERROR("only one of -F and -G can be given at once\n"); - return 1; - } - - if (a == argc && !input_string) { - SERDI_ERROR("missing input\n"); - return print_usage(prog, true); - } - - char* const* const inputs = argv + a; - const int n_inputs = argc - a; - - bool input_has_graphs = serd_syntax_has_graphs(input_syntax); - for (int i = a; i < argc; ++i) { - if (serd_syntax_has_graphs(serd_guess_syntax(argv[i]))) { - input_has_graphs = true; - break; - } - } - - if (!output_syntax && !osyntax_set) { - output_syntax = input_has_graphs ? SERD_NQUADS : SERD_NTRIPLES; - } - - if (!base && n_inputs == 1 && - (output_syntax == SERD_NQUADS || output_syntax == SERD_NTRIPLES)) { - // Choose base URI from the single input path - char* const input_path = serd_canonical_path(inputs[0]); - if (!input_path || !(base = serd_new_file_uri(SERD_STRING(input_path), - SERD_EMPTY_STRING()))) { - SERDI_ERRORF("unable to determine base URI from path %s\n", inputs[0]); - } - serd_free(input_path); - } - - SerdWorld* const world = serd_world_new(); - SerdEnv* const env = - serd_env_new(base ? serd_node_string_view(base) : SERD_EMPTY_STRING()); - - serd_set_stream_utf8_mode(stdin); - if (!out_filename) { - serd_set_stream_utf8_mode(stdout); - } - - const SerdDescribeFlags describe_flags = - no_inline ? SERD_NO_INLINE_OBJECTS : 0u; - - SerdByteSink* const byte_sink = serd_open_output(out_filename, block_size); - if (!byte_sink) { - perror("serdi: error opening output file"); - return 1; - } - - SerdWriter* const writer = - serd_writer_new(world, output_syntax, writer_flags, env, byte_sink); - - SerdModel* model = NULL; - SerdSink* inserter = NULL; - const SerdSink* out_sink = NULL; - if (use_model) { - const SerdModelFlags flags = (input_has_graphs ? SERD_STORE_GRAPHS : 0u); - - model = serd_model_new(world, SERD_ORDER_SPO, flags); - if (input_has_graphs) { - serd_model_add_index(model, SERD_ORDER_GSPO); - } - - if (!no_inline) { - serd_model_add_index(model, SERD_ORDER_OPS); - if (input_has_graphs) { - serd_model_add_index(model, SERD_ORDER_GOPS); - } - } - - inserter = serd_inserter_new(model, NULL); - out_sink = inserter; - } else { - out_sink = serd_writer_sink(writer); - } - - const SerdSink* sink = out_sink; - - SerdSink* canon = NULL; - if (canonical) { - sink = canon = serd_canon_new(world, out_sink, reader_flags); - } - - SerdSink* filter = NULL; - if (in_pattern) { - if (!(filter = parse_filter(world, sink, in_pattern, true))) { - SERDI_ERROR("error parsing inclusive filter pattern\n"); - return EXIT_FAILURE; - } - - sink = filter; - } else if (out_pattern) { - if (!(filter = parse_filter(world, sink, out_pattern, false))) { - SERDI_ERROR("error parsing exclusive filter pattern\n"); - return EXIT_FAILURE; - } - - sink = filter; - } - - if (quiet) { - serd_set_log_func(world, serd_quiet_log_func, NULL); - } - - if (root_uri) { - serd_writer_set_root_uri(writer, SERD_STRING(root_uri)); - } - - SerdStatus st = SERD_SUCCESS; - if (input_string) { - SerdByteSource* const byte_source = - serd_byte_source_new_string(input_string, NULL); - - SerdReader* const reader = - serd_reader_new(world, - input_syntax ? input_syntax : SERD_TRIG, - reader_flags, - env, - sink, - stack_size); - - if (!(st = serd_reader_start(reader, byte_source))) { - st = serd_reader_read_document(reader); - } - - serd_reader_free(reader); - serd_byte_source_free(byte_source); - } - - if (n_inputs == 1) { - reader_flags |= SERD_READ_GLOBAL; - } - - for (int i = 0; !st && i < n_inputs; ++i) { - if (!base && strcmp(inputs[i], "-")) { - if ((st = serd_set_base_uri_from_path(env, inputs[i]))) { - SERDI_ERRORF("failed to set base URI from path %s\n", inputs[i]); - break; - } - } - - if ((st = - read_file(world, - serd_choose_input_syntax(world, input_syntax, inputs[i]), - reader_flags, - env, - sink, - stack_size, - inputs[i], - block_size))) { - break; - } - } - - if (st <= SERD_FAILURE && use_model) { - const SerdSink* writer_sink = serd_writer_sink(writer); - SerdCursor* everything = serd_model_begin_ordered( - model, input_has_graphs ? SERD_ORDER_GSPO : SERD_ORDER_SPO); - - serd_env_write_prefixes(env, writer_sink); - - st = serd_describe_range( - everything, - writer_sink, - describe_flags | - ((output_syntax == SERD_NTRIPLES || output_syntax == SERD_NQUADS) - ? SERD_NO_INLINE_OBJECTS - : 0u)); - - serd_cursor_free(everything); - } - - serd_sink_free(canon); - serd_sink_free(filter); - serd_sink_free(inserter); - serd_model_free(model); - serd_writer_free(writer); - serd_env_free(env); - serd_node_free(base); - serd_world_free(world); - - if (serd_byte_sink_close(byte_sink)) { - perror("serdi: write error"); - st = SERD_ERR_UNKNOWN; - } - - serd_byte_sink_free(byte_sink); - - return (st > SERD_FAILURE) ? 1 : 0; -} |