aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--NEWS1
-rw-r--r--doc/man/serd-pipe.198
-rw-r--r--test/meson.build43
-rwxr-xr-xtest/run_suite.py3
-rw-r--r--test/serd_test_util/__init__.py13
-rwxr-xr-xtest/test_stdin.py2
-rw-r--r--test/trig_no_extension11
-rw-r--r--test/trig_unknown_extension.n311
-rw-r--r--tools/.clang-tidy1
-rw-r--r--tools/console.c331
-rw-r--r--tools/console.h96
-rw-r--r--tools/serd-pipe.c441
12 files changed, 679 insertions, 372 deletions
diff --git a/NEWS b/NEWS
index a222c99b..3fece2d3 100644
--- a/NEWS
+++ b/NEWS
@@ -11,6 +11,7 @@ serd (1.1.1) unstable; urgency=medium
* Make nodes opaque
* Preserve anonymous graph syntax in TriG
* Preserve long or short quoting from input documents
+ * Refine and simplify command-line interface to support new features
* Remove SERD_DISABLE_DEPRECATED and SERD_DEPRECATED_BY
* Remove serd_uri_to_path()
* Remove support for reading Turtle named inline nodes extension
diff --git a/doc/man/serd-pipe.1 b/doc/man/serd-pipe.1
index b1e01990..4d475985 100644
--- a/doc/man/serd-pipe.1
+++ b/doc/man/serd-pipe.1
@@ -8,15 +8,15 @@
.Nd read and write RDF data
.Sh SYNOPSIS
.Nm serd-pipe
-.Op Fl Cfhqv
+.Op Fl CVhq
.Op Fl B Ar base
.Op Fl I Ar syntax
.Op Fl O Ar syntax
+.Op Fl R Ar root
.Op Fl b Ar bytes
.Op Fl k Ar bytes
-.Op Fl r Ar root
+.Op Fl o Ar filename
.Op Fl s Ar string
-.Op Fl w Ar filename
.Op Ar input ...
.Sh DESCRIPTION
.Nm
@@ -60,12 +60,34 @@ then input is read as TriG and output is written as NQuads
The options are as follows:
.Bl -tag -width 3n
.It Fl B Ar base
-Input base URI.
-Relative URI references in the input will be resolved against this.
-When the input is a file,
-the URI of the file is automatically used as the base URI.
-This option can be used to override that,
-or to provide a base URI for input from stdin or a string.
+Base URI, path, or
+.Cm rebase
+to use the output path.
+This is used to resolve relative URI references in the input.
+.Pp
+If the input is a file,
+its path is used by default,
+so relative paths are written as they are in the input.
+The special
+.Cm rebase
+argument will instead use the output path set by the
+.Fl o
+option,
+so paths are written relative to the output file.
+.Pp
+The distinction matters when reading from bundles of files that refer to each other.
+For example,
+when copying
+.Pa in/manifest.ttl
+to
+.Pa out/manifest.ttl ,
+the relative URI reference
+.Ql <data.ttl>
+will be written as
+.Ql <../in/data.ttl>
+when using
+.Fl o
+.Cm rebase .
.It Fl C
Convert literals to canonical form.
Literals with supported XSD datatypes will be parsed and rewritten canonically.
@@ -155,26 +177,7 @@ The
.Cm empty
syntax suppresses the output,
so that only warnings and errors will be printed.
-.It Fl b Ar bytes
-I/O block size.
-This is the number of bytes in a file that will be read or written at once.
-The default is 4096, which should perform well in most cases.
-Note that this only applies to files, standard input and output are always processed one byte at a time.
-.It Fl f
-Fast and loose URI mode:
-preserve full URIs (without qualifying or making relative),
-and pass prefixed names through as-is.
-.It Fl h
-Print the command line options.
-.It Fl k Ar bytes
-Parser stack size.
-Parsing is performed using a pre-allocated stack for performance and security reasons.
-By default, the stack is 1 MiB, which should be sufficient for most data.
-This can be increased to support unusually structured data and huge literals,
-or decreased to reduce overall memory requirements and reduce startup time.
-.It Fl q
-Suppress all output except data.
-.It Fl r Ar root
+.It Fl R Ar root
Keep relative URIs within a
.Ar root
URI.
@@ -188,29 +191,44 @@ if
.Pa /home/you/file.ttl
is written to the file
.Pa /home/me/output.ttl
-using the destination's base URI,
-then it could be written as
+using
+.Fl B Cm rebase ,
+then it will be written as
.Li <../you/file.ttl> .
Setting
-.Fl r Li file:///home/me/
+.Fl R Pa /home/me/
would prevent references from
.Dq escaping
like this,
so the above would instead be written as
-.Li <file:///home/you/file.ttl> ,
-since it can't be expressed relative to the root URI.
+.Li <file:///home/you/file.ttl> .
.Pp
This is useful for keeping relative references within some directory.
-.It Fl s Ar string
-Parse
-.Ar string
-as input.
-.It Fl v
+.It Fl V
Display version information and exit.
-.It Fl w Ar filename
+.It Fl b Ar bytes
+I/O block size.
+This is the number of bytes in a file that will be read or written at once.
+The default is 4096, which should perform well in most cases.
+Note that this only applies to files, standard input and output are always processed one byte at a time.
+.It Fl h
+Print the command line options.
+.It Fl k Ar bytes
+Parser stack size.
+Parsing is performed using a pre-allocated stack for performance and security reasons.
+By default, the stack is 1 MiB, which should be sufficient for most data.
+This can be increased to support unusually structured data and huge literals,
+or decreased to reduce overall memory requirements and reduce startup time.
+.It Fl o Ar filename
Write output to the given
.Ar filename
instead of stdout.
+.It Fl q
+Suppress all output except data.
+.It Fl s Ar string
+Parse
+.Ar string
+as input.
.El
.Sh ENVIRONMENT
Errors and warnings are printed in color by default if the output is a terminal.
diff --git a/test/meson.build b/test/meson.build
index 0e887ccb..43fc7211 100644
--- a/test/meson.build
+++ b/test/meson.build
@@ -181,32 +181,27 @@ simple_command_tests = {
'bad': [
['-B', 'nonuriorpath'],
['-B'],
- ['-I', 'turtle'],
['-I', 'unknown'],
['-I'],
['-O', 'unknown'],
['-O'],
+ ['-R'],
['-b', '-1'],
['-b', '1024junk'],
- ['-b', '9223372036854775807'],
['-b'],
- ['-k', '-1'],
- ['-k', '1024junk'],
['-k', '9223372036854775807'],
['-k'],
['-qi'],
- ['-r'],
['-s', '<foo> a <Bar> .'],
['-s'],
- ['-w'],
['-z'],
],
'good': [
['--help'],
['--version'],
+ ['-V'],
['-h'],
['-k', '512', '-s', '<go:>a<go:> .'],
- ['-v'],
],
},
}
@@ -236,17 +231,30 @@ if is_variable('serd_pipe')
endforeach
endforeach
- test('none', serd_pipe, env: test_env, should_fail: true, suite: cmd_suite)
-
# Base URI options
test(
+ 'bad_rebase',
+ serd_pipe,
+ args: ['-B', 'rebase', serd_ttl],
+ env: test_env,
+ should_fail: true,
+ suite: cmd_suite,
+ )
+ test(
'base',
files('test_base.py'),
args: pipe_script_args,
env: test_env,
suite: cmd_suite,
)
+ test(
+ 'dir_base',
+ serd_pipe,
+ args: ['-B', serd_src_root / '', serd_ttl],
+ env: test_env,
+ suite: cmd_suite,
+ )
# Log
@@ -286,6 +294,21 @@ if is_variable('serd_pipe')
input_suite = ['tools', 'pipe', 'input']
+ good_input_tests = {
+ 'unknown_extension': [files('trig_unknown_extension.n3')],
+ 'no_extension': [files('trig_no_extension')],
+ }
+
+ foreach name, args : good_input_tests
+ test(
+ name,
+ serd_pipe,
+ args: args,
+ env: test_env,
+ suite: input_suite,
+ )
+ endforeach
+
bad_input_tests = {
'string': ['-s', '<foo> a <Bar> .'],
'no_such_file': ['no_such_file'],
@@ -513,7 +536,7 @@ test_suites = {
files('extra/root/manifest.ttl'),
ns_serdtest + 'root/',
'--',
- ['-r', 'http://example.org/top/root/'],
+ ['-R', 'http://example.org/top/root/'],
],
'terse': [
files('extra/terse/manifest.ttl'),
diff --git a/test/run_suite.py b/test/run_suite.py
index 16e527af..52a418ef 100755
--- a/test/run_suite.py
+++ b/test/run_suite.py
@@ -40,8 +40,7 @@ TEST_TYPES = [
def run_eval_test(command, in_path, good_path, out_path):
"""Run a positive eval test and return whether the output matches."""
- syntax = util.syntax_from_path(out_path)
- command = command + ["-O", syntax, "-w", out_path, in_path]
+ command = command + ["-o", out_path, in_path]
subprocess.check_call(command, encoding="utf-8")
with open(good_path, "r", encoding="utf-8") as good:
diff --git a/test/serd_test_util/__init__.py b/test/serd_test_util/__init__.py
index f465a4b7..5f0e0033 100644
--- a/test/serd_test_util/__init__.py
+++ b/test/serd_test_util/__init__.py
@@ -110,19 +110,6 @@ def file_path(suite_dir, uri):
return os.path.relpath(os.path.join(suite_dir, os.path.basename(uri)))
-def syntax_from_path(path):
- """Return the serd syntax name corresponding to a file path."""
-
- extensions = {
- ".ttl": "turtle",
- ".nt": "ntriples",
- ".trig": "trig",
- ".nq": "nquads",
- }
-
- return extensions[os.path.splitext(path)[1]]
-
-
def earl_assertion(test, passed, asserter):
"""Return a Turtle description of an assertion for the test report."""
diff --git a/test/test_stdin.py b/test/test_stdin.py
index 790d34ce..7a2ab34e 100755
--- a/test/test_stdin.py
+++ b/test/test_stdin.py
@@ -10,7 +10,7 @@
import serd_test_util as util
args = util.wrapper_args(__doc__)
-command = [args.tool, "-I", "ntriples", "-B", "http://example.org", "-"]
+command = [args.tool, "-I", "ntriples", "-B", "http://example.org"]
DOC = "<{0}s> <{0}p> <{0}o> .".format("http://example.org/")
diff --git a/test/trig_no_extension b/test/trig_no_extension
new file mode 100644
index 00000000..e1c292d9
--- /dev/null
+++ b/test/trig_no_extension
@@ -0,0 +1,11 @@
+# Copyright 2023 David Robillard <d@drobilla.net>
+# SPDX-License-Identifier: 0BSD OR ISC
+
+@prefix eg: <http://example.org/> .
+
+eg:g {
+ eg:s
+ eg:p [
+ a eg:Object
+ ] .
+}
diff --git a/test/trig_unknown_extension.n3 b/test/trig_unknown_extension.n3
new file mode 100644
index 00000000..e1c292d9
--- /dev/null
+++ b/test/trig_unknown_extension.n3
@@ -0,0 +1,11 @@
+# Copyright 2023 David Robillard <d@drobilla.net>
+# SPDX-License-Identifier: 0BSD OR ISC
+
+@prefix eg: <http://example.org/> .
+
+eg:g {
+ eg:s
+ eg:p [
+ a eg:Object
+ ] .
+}
diff --git a/tools/.clang-tidy b/tools/.clang-tidy
index f7c82d78..113631df 100644
--- a/tools/.clang-tidy
+++ b/tools/.clang-tidy
@@ -8,5 +8,4 @@ Checks: >
-concurrency-mt-unsafe,
-hicpp-signed-bitwise,
-llvm-header-guard,
- -readability-function-cognitive-complexity,
InheritParentConfig: true
diff --git a/tools/console.c b/tools/console.c
index 94c6dc79..2a861d1b 100644
--- a/tools/console.c
+++ b/tools/console.c
@@ -3,8 +3,15 @@
#include "console.h"
-#include "serd/serd.h"
+#include "serd/log.h"
+#include "serd/node.h"
+#include "serd/stream.h"
+#include "serd/string.h"
+#include "serd/syntax.h"
+#include "serd/uri.h"
+#include "serd/version.h"
#include "zix/allocator.h"
+#include "zix/attributes.h"
#include "zix/filesystem.h"
#ifdef _WIN32
@@ -15,9 +22,86 @@
# include <io.h>
#endif
+#include <errno.h>
+#include <limits.h>
+#include <stdbool.h>
#include <stdint.h>
+#include <stdlib.h>
#include <string.h>
+#define MAX_DEPTH 128U
+
+ZIX_PURE_FUNC bool
+serd_option_iter_is_end(const OptionIter iter)
+{
+ return iter.a >= iter.argc || iter.argv[iter.a][0] != '-' ||
+ !iter.argv[iter.a][iter.f];
+}
+
+SerdStatus
+serd_option_iter_advance(OptionIter* const iter)
+{
+ if (!iter->argv[iter->a][++iter->f]) {
+ ++iter->a;
+ iter->f = 1;
+ }
+
+ return SERD_SUCCESS;
+}
+
+SerdStatus
+serd_tool_setup(SerdTool* const tool,
+ const char* const program,
+ SerdCommonOptions options)
+{
+ // Open the output first, since if that fails we have nothing to do
+ const char* const out_path = options.out_filename;
+ if (!((tool->out = serd_open_tool_output(out_path)).stream)) {
+ fprintf(stderr,
+ "%s: failed to open output file (%s)\n",
+ program,
+ strerror(errno));
+ return SERD_BAD_STREAM;
+ }
+
+ // We have something to write to, so build the writing environment
+ const SerdLimits limits = {options.stack_size, MAX_DEPTH};
+ if (!(tool->world = serd_world_new(NULL)) ||
+ serd_world_set_limits(tool->world, limits) ||
+ !(tool->env = serd_create_env(
+ NULL, program, options.base_uri, options.out_filename)) ||
+ !(tool->writer = serd_writer_new(
+ tool->world,
+ serd_choose_syntax(
+ tool->world, options.output, options.out_filename, SERD_NQUADS),
+ options.output.flags,
+ tool->env,
+ &tool->out,
+ options.block_size))) {
+ fprintf(stderr, "%s: failed to set up writing environment\n", program);
+ return SERD_UNKNOWN_ERROR;
+ }
+
+ return SERD_SUCCESS;
+}
+
+SerdStatus
+serd_tool_cleanup(SerdTool tool)
+{
+ SerdStatus st = SERD_SUCCESS;
+ if (tool.out.stream) {
+ // Close the output stream explicitly to check if there were any errors
+ if ((st = serd_close_output(&tool.out))) {
+ perror("write error");
+ }
+ }
+
+ serd_writer_free(tool.writer);
+ serd_env_free(tool.env);
+ serd_world_free(tool.world);
+ return st;
+}
+
void
serd_set_stream_utf8_mode(FILE* const stream)
{
@@ -28,7 +112,7 @@ serd_set_stream_utf8_mode(FILE* const stream)
#endif
}
-int
+SerdStatus
serd_print_version(const char* const program)
{
printf("%s %d.%d.%d <http://drobilla.net/software/serd>\n",
@@ -42,34 +126,90 @@ serd_print_version(const char* const program)
"This is free software; you are free to change and redistribute it.\n"
"There is NO WARRANTY, to the extent permitted by law.\n");
- return 0;
+ return SERD_FAILURE;
+}
+
+SerdStatus
+serd_get_argument(OptionIter* const iter, const char** const argument)
+{
+ const char flag = iter->argv[iter->a][iter->f++];
+
+ if (iter->argv[iter->a][iter->f] || (iter->a + 1) == iter->argc) {
+ fprintf(
+ stderr, "%s: option requires an argument -- %c\n", iter->argv[0], flag);
+ return SERD_BAD_ARG;
+ }
+
+ *argument = iter->argv[++iter->a];
+ ++iter->a;
+ iter->f = 1;
+ return SERD_SUCCESS;
+}
+
+SerdStatus
+serd_get_size_argument(OptionIter* const iter, size_t* const argument)
+{
+ SerdStatus st = SERD_SUCCESS;
+ const char* string = NULL;
+ if ((st = serd_get_argument(iter, &string))) {
+ return st;
+ }
+
+ char* endptr = NULL;
+ const long size = strtol(string, &endptr, 10);
+ if (size <= 0 || size == LONG_MAX || *endptr != '\0') {
+ return SERD_BAD_ARG;
+ }
+
+ *argument = (size_t)size;
+ return SERD_SUCCESS;
}
SerdStatus
serd_set_base_uri_from_path(SerdEnv* const env, const char* const path)
{
- char* const input_path = zix_canonical_path(NULL, path);
- if (!input_path) {
+ const size_t path_len = strlen(path);
+ char* const real_path = zix_canonical_path(NULL, path);
+ if (!real_path) {
return SERD_BAD_ARG;
}
- SerdNode* const file_uri = serd_node_new(
- NULL, serd_a_file_uri(serd_string(input_path), serd_empty_string()));
+ const size_t real_path_len = strlen(real_path);
+ SerdNode* base_node = NULL;
+ if (path[path_len - 1] == '/' || path[path_len - 1] == '\\') {
+ char* const base_path = (char*)calloc(real_path_len + 2, 1);
+
+ memcpy(base_path, real_path, real_path_len + 1);
+ base_path[real_path_len] = path[path_len - 1];
- serd_env_set_base_uri(env, serd_node_string_view(file_uri));
- serd_node_free(NULL, file_uri);
- zix_free(NULL, input_path);
+ base_node = serd_node_new(
+ NULL, serd_a_file_uri(serd_string(base_path), serd_empty_string()));
+
+ free(base_path);
+ } else {
+ base_node = serd_node_new(
+ NULL, serd_a_file_uri(serd_string(real_path), serd_empty_string()));
+ }
+
+ serd_env_set_base_uri(env, serd_node_string_view(base_node));
+ serd_node_free(NULL, base_node);
+ zix_free(NULL, real_path);
return SERD_SUCCESS;
}
SerdSyntax
-serd_choose_syntax(SerdWorld* const world,
- const SerdSyntax requested,
- const char* const filename)
+serd_choose_syntax(SerdWorld* const world,
+ const SerdSyntaxOptions options,
+ const char* const filename,
+ const SerdSyntax fallback)
{
- if (requested) {
- return requested;
+ if (options.overridden || options.syntax != SERD_SYNTAX_EMPTY) {
+ return options.syntax;
+ }
+
+ if (!filename || !strcmp(filename, "-")) {
+ return fallback;
}
const SerdSyntax guessed = serd_guess_syntax(filename);
@@ -118,7 +258,26 @@ serd_set_input_option(const SerdStringView name,
}
}
- return SERD_FAILURE;
+ return SERD_BAD_ARG;
+}
+
+SerdStatus
+serd_parse_input_argument(OptionIter* const iter,
+ SerdSyntaxOptions* const options)
+{
+ SerdStatus st = SERD_SUCCESS;
+ const char* argument = NULL;
+
+ if (!(st = serd_get_argument(iter, &argument))) {
+ if ((st = serd_set_input_option(
+ serd_string(argument), &options->syntax, &options->flags))) {
+ fprintf(stderr, "%s: unknown option \"%s\"\n", iter->argv[0], argument);
+ } else if (!strcmp(argument, "empty") || options->syntax) {
+ options->overridden = true;
+ }
+ }
+
+ return st;
}
SerdStatus
@@ -154,9 +313,87 @@ serd_set_output_option(const SerdStringView name,
}
}
+ return SERD_BAD_ARG;
+}
+
+SerdStatus
+serd_parse_output_argument(OptionIter* const iter,
+ SerdSyntaxOptions* const options)
+{
+ SerdStatus st = SERD_SUCCESS;
+ const char* argument = NULL;
+
+ if (!(st = serd_get_argument(iter, &argument))) {
+ if ((st = serd_set_output_option(
+ serd_string(argument), &options->syntax, &options->flags))) {
+ fprintf(stderr, "%s: unknown option \"%s\"\n", iter->argv[0], argument);
+ } else if (!strcmp(argument, "empty") || options->syntax) {
+ options->overridden = true;
+ }
+ }
+
+ return st;
+}
+
+SerdStatus
+serd_parse_common_option(OptionIter* const iter, SerdCommonOptions* const opts)
+{
+ const char opt = iter->argv[iter->a][iter->f];
+ switch (opt) {
+ case 'B':
+ return serd_get_argument(iter, &opts->base_uri);
+
+ case 'I':
+ return serd_parse_input_argument(iter, &opts->input);
+
+ case 'O':
+ return serd_parse_output_argument(iter, &opts->output);
+
+ case 'b':
+ return serd_get_size_argument(iter, &opts->block_size);
+
+ case 'k':
+ return serd_get_size_argument(iter, &opts->stack_size);
+
+ case 'o':
+ return serd_get_argument(iter, &opts->out_filename);
+
+ default:
+ break;
+ }
+
return SERD_FAILURE;
}
+SerdEnv*
+serd_create_env(SerdAllocator* const allocator,
+ const char* const program,
+ const char* const base_string,
+ const char* const out_filename)
+{
+ const bool is_rebase = base_string && !strcmp(base_string, "rebase");
+ if (is_rebase && !out_filename) {
+ fprintf(stderr, "%s: rebase requires an output filename\n", program);
+ return NULL;
+ }
+
+ if (base_string && serd_uri_string_has_scheme(base_string)) {
+ return serd_env_new(allocator, serd_string(base_string));
+ }
+
+ SerdEnv* const env = serd_env_new(allocator, serd_empty_string());
+ if (base_string && base_string[0]) {
+ const SerdStatus st = serd_set_base_uri_from_path(env, base_string);
+ if (st) {
+ fprintf(stderr, "%s: invalid base URI \"%s\"\n", program, base_string);
+ serd_env_free(env);
+ return NULL;
+ }
+ }
+
+ return env;
+}
+
/// Wrapper for getc that is compatible with SerdReadFunc but faster than fread
static size_t
serd_file_read_byte(void* buf, size_t size, size_t nmemb, void* stream)
@@ -201,3 +438,65 @@ serd_open_tool_output(const char* const filename)
return serd_open_output_file(filename);
}
+
+SerdStatus
+serd_read_source(SerdWorld* const world,
+ const SerdCommonOptions opts,
+ SerdEnv* const env,
+ const SerdSyntax syntax,
+ SerdInputStream* const in,
+ const char* const name,
+ const SerdSink* const sink)
+{
+ SerdReader* const reader =
+ serd_reader_new(world, syntax, opts.input.flags, env, sink);
+
+ SerdNode* const name_node = serd_node_new(NULL, serd_a_string(name));
+ SerdStatus st = serd_reader_start(reader, in, name_node, opts.block_size);
+ serd_node_free(NULL, name_node);
+ if (!st) {
+ st = serd_reader_read_document(reader);
+ }
+
+ serd_reader_free(reader);
+ return st;
+}
+
+SerdStatus
+serd_read_inputs(SerdWorld* const world,
+ const SerdCommonOptions opts,
+ SerdEnv* const env,
+ const intptr_t n_inputs,
+ char* const* const inputs,
+ const SerdSink* const sink)
+{
+ SerdStatus st = SERD_SUCCESS;
+
+ for (intptr_t i = 0; !st && i < n_inputs; ++i) {
+ // Use the filename as the base URI if possible if user didn't override it
+ const char* const in_path = inputs[i];
+ if (!opts.base_uri[0] && !!strcmp(in_path, "-")) {
+ serd_set_base_uri_from_path(env, in_path);
+ }
+
+ // Open the input stream
+ SerdInputStream in = serd_open_tool_input(in_path);
+ if (!in.stream) {
+ return SERD_BAD_ARG;
+ }
+
+ // Read the entire file
+ st = serd_read_source(
+ world,
+ opts,
+ env,
+ serd_choose_syntax(world, opts.input, in_path, SERD_TRIG),
+ &in,
+ !strcmp(in_path, "-") ? "stdin" : in_path,
+ sink);
+
+ serd_close_input(&in);
+ }
+
+ return st;
+}
diff --git a/tools/console.h b/tools/console.h
index a7e8423f..d475aebc 100644
--- a/tools/console.h
+++ b/tools/console.h
@@ -6,29 +6,85 @@
#include "serd/env.h"
#include "serd/input_stream.h"
+#include "serd/memory.h"
#include "serd/output_stream.h"
#include "serd/reader.h"
+#include "serd/sink.h"
#include "serd/status.h"
#include "serd/string_view.h"
#include "serd/syntax.h"
#include "serd/world.h"
#include "serd/writer.h"
+#include <stdbool.h>
+#include <stdint.h>
#include <stdio.h>
+// Iterator over command-line options with support for BSD-style flag merging
+typedef struct {
+ char* const* argv; ///< Complete argument vector (from main)
+ int argc; ///< Total number of arguments (from main)
+ int a; ///< Argument index (index into argv)
+ int f; ///< Flag index (offset in argv[arg])
+} OptionIter;
+
+// Options for the input or output syntax
+typedef struct {
+ SerdSyntax syntax; ///< User-specified syntax, or empty
+ uint32_t flags; ///< SerdReaderFlags or SerdWriterFlags
+ bool overridden; ///< True if syntax was explicitly given
+} SerdSyntaxOptions;
+
+// Options common to all command-line tools
+typedef struct {
+ const char* base_uri;
+ const char* out_filename;
+ size_t block_size;
+ size_t stack_size;
+ SerdSyntaxOptions input;
+ SerdSyntaxOptions output;
+} SerdCommonOptions;
+
+// Common "global" state of a command-line tool that writes data
+typedef struct {
+ SerdOutputStream out;
+ SerdWorld* world;
+ SerdEnv* env;
+ SerdWriter* writer;
+} SerdTool;
+
+bool
+serd_option_iter_is_end(OptionIter iter);
+
+SerdStatus
+serd_option_iter_advance(OptionIter* iter);
+
+SerdStatus
+serd_tool_setup(SerdTool* tool, const char* program, SerdCommonOptions options);
+
+SerdStatus
+serd_tool_cleanup(SerdTool tool);
+
void
serd_set_stream_utf8_mode(FILE* stream);
-int
+SerdStatus
serd_print_version(const char* program);
SerdStatus
serd_set_base_uri_from_path(SerdEnv* env, const char* path);
SerdSyntax
-serd_choose_syntax(SerdWorld* world,
- SerdSyntax requested,
- const char* filename);
+serd_choose_syntax(SerdWorld* world,
+ SerdSyntaxOptions options,
+ const char* filename,
+ SerdSyntax fallback);
+
+SerdStatus
+serd_get_argument(OptionIter* iter, const char** argument);
+
+SerdStatus
+serd_get_size_argument(OptionIter* iter, size_t* argument);
SerdStatus
serd_set_input_option(SerdStringView name,
@@ -36,14 +92,46 @@ serd_set_input_option(SerdStringView name,
SerdReaderFlags* flags);
SerdStatus
+serd_parse_input_argument(OptionIter* iter, SerdSyntaxOptions* options);
+
+SerdStatus
serd_set_output_option(SerdStringView name,
SerdSyntax* syntax,
SerdWriterFlags* flags);
+SerdStatus
+serd_parse_output_argument(OptionIter* iter, SerdSyntaxOptions* options);
+
+SerdStatus
+serd_parse_common_option(OptionIter* iter, SerdCommonOptions* opts);
+
+SerdEnv*
+serd_create_env(SerdAllocator* allocator,
+ const char* program,
+ const char* base_string,
+ const char* out_filename);
+
SerdInputStream
serd_open_tool_input(const char* filename);
SerdOutputStream
serd_open_tool_output(const char* filename);
+SerdStatus
+serd_read_source(SerdWorld* world,
+ SerdCommonOptions opts,
+ SerdEnv* env,
+ SerdSyntax syntax,
+ SerdInputStream* in,
+ const char* name,
+ const SerdSink* sink);
+
+SerdStatus
+serd_read_inputs(SerdWorld* world,
+ SerdCommonOptions opts,
+ SerdEnv* env,
+ intptr_t n_inputs,
+ char* const* inputs,
+ const SerdSink* sink);
+
#endif // SERD_TOOLS_CONSOLE_H
diff --git a/tools/serd-pipe.c b/tools/serd-pipe.c
index 66600006..bbed9fa8 100644
--- a/tools/serd-pipe.c
+++ b/tools/serd-pipe.c
@@ -4,344 +4,215 @@
#include "console.h"
#include "serd/canon.h"
-#include "serd/env.h"
#include "serd/input_stream.h"
#include "serd/log.h"
-#include "serd/node.h"
-#include "serd/output_stream.h"
#include "serd/reader.h"
#include "serd/sink.h"
#include "serd/status.h"
#include "serd/string_view.h"
#include "serd/syntax.h"
-#include "serd/world.h"
#include "serd/writer.h"
-#include "zix/allocator.h"
-#include "zix/filesystem.h"
-#include <errno.h>
-#include <limits.h>
#include <stdbool.h>
+#include <stdint.h>
#include <stdio.h>
-#include <stdlib.h>
#include <string.h>
-#define SERDI_ERROR(msg) fprintf(stderr, "serd-pipe: " msg)
-#define SERDI_ERRORF(fmt, ...) fprintf(stderr, "serd-pipe: " fmt, __VA_ARGS__)
+/* Application (after parsing command-line arguments) */
-#define MAX_DEPTH 128U
-
-static int
-print_usage(const char* const name, const bool error)
-{
- static const char* const description =
- "Read and write RDF syntax.\n"
- "Use - for INPUT to read from standard input.\n\n"
- " -B BASE_URI Base URI.\n"
- " -C Convert literals to canonical form.\n"
- " -I SYNTAX Input syntax (turtle/ntriples/trig/nquads),\n"
- " or flag (lax/variables/verbatim).\n"
- " -O SYNTAX Output syntax (empty/turtle/ntriples/nquads),\n"
- " or flag (ascii/expanded/verbatim/terse/lax).\n"
- " -b BYTES I/O block size.\n"
- " -h Display this help and exit.\n"
- " -k BYTES Parser stack size.\n"
- " -q Suppress all output except data.\n"
- " -r ROOT_URI Keep relative URIs within ROOT_URI.\n"
- " -s STRING Parse STRING as input.\n"
- " -v Display version information and exit.\n"
- " -w FILENAME Write output to FILENAME instead of stdout.\n";
-
- FILE* const os = error ? stderr : stdout;
- fprintf(os, "%s", error ? "\n" : "");
- fprintf(os, "Usage: %s [OPTION]... INPUT...\n", name);
- fprintf(os, "%s", description);
- return error ? 1 : 0;
-}
-
-static int
-missing_arg(const char* const name, const char opt)
-{
- SERDI_ERRORF("option requires an argument -- '%c'\n", opt);
- return print_usage(name, true);
-}
+// All options
+typedef struct {
+ SerdCommonOptions common;
+ const char* root_uri;
+ const char* input_string;
+ char* const* inputs;
+ intptr_t n_inputs;
+ bool canonical;
+ bool quiet;
+} Options;
+// Run the tool using the given options
static SerdStatus
-read_file(SerdWorld* const world,
- const SerdSyntax syntax,
- const SerdReaderFlags flags,
- SerdEnv* const env,
- const SerdSink* const sink,
- const size_t stack_size,
- const char* const filename,
- const size_t block_size)
+run(const Options opts)
{
- SerdInputStream in = serd_open_tool_input(filename);
- if (!in.stream) {
- SERDI_ERRORF(
- "failed to open input file `%s' (%s)\n", filename, strerror(errno));
+ SerdTool app = {{NULL, NULL, NULL, NULL}, NULL, NULL, NULL};
- return SERD_BAD_STREAM;
+ // Set up the writing environment
+ SerdStatus st = SERD_SUCCESS;
+ if ((st = serd_tool_setup(&app, "serd-pipe", opts.common))) {
+ serd_tool_cleanup(app);
+ return st;
}
- const SerdLimits limits = {stack_size, MAX_DEPTH};
- serd_world_set_limits(world, limits);
+ if (opts.quiet) {
+ serd_set_log_func(app.world, serd_quiet_log_func, NULL);
+ }
- SerdReader* reader = serd_reader_new(world, syntax, flags, env, sink);
- SerdStatus st = serd_reader_start(reader, &in, NULL, block_size);
+ serd_writer_set_root_uri(app.writer, serd_string(opts.root_uri));
- st = st ? st : serd_reader_read_document(reader);
+ // Set up the output pipeline: [canon] -> writer
+ const SerdSink* const target = serd_writer_sink(app.writer);
+ const SerdSink* sink = target;
+ SerdSink* canon = NULL;
+ if (opts.canonical) {
+ canon = serd_canon_new(app.world, target, opts.common.input.flags);
+ sink = canon;
+ }
- serd_reader_free(reader);
- serd_close_input(&in);
+ if (opts.input_string) {
+ const char* position = opts.input_string;
+ SerdInputStream in = serd_open_input_string(&position);
- return st;
-}
+ st = serd_read_source(
+ app.world,
+ opts.common,
+ app.env,
+ serd_choose_syntax(app.world, opts.common.input, NULL, SERD_TRIG),
+ &in,
+ "string",
+ sink);
-int
-main(int argc, char** argv)
-{
- const char* const prog = argv[0];
-
- SerdNode* base = NULL;
- SerdSyntax input_syntax = SERD_SYNTAX_EMPTY;
- SerdSyntax output_syntax = SERD_SYNTAX_EMPTY;
- SerdReaderFlags reader_flags = 0;
- SerdWriterFlags writer_flags = 0;
- bool osyntax_set = false;
- bool canonical = false;
- bool quiet = false;
- size_t block_size = 4096U;
- size_t stack_size = 1048576U;
- const char* input_string = NULL;
- const char* root_uri = NULL;
- const char* out_filename = NULL;
- int a = 1;
- for (; a < argc && argv[a][0] == '-'; ++a) {
- if (argv[a][1] == '\0') {
- break;
- }
+ serd_close_input(&in);
+ }
- if (!strcmp(argv[a], "--help")) {
- return print_usage(prog, false);
- }
+ // Read all the inputs, which drives the writer to emit the output
+ if (st ||
+ (st = serd_read_inputs(
+ app.world, opts.common, app.env, opts.n_inputs, opts.inputs, sink)) ||
+ (st = serd_writer_finish(app.writer))) {
+ serd_sink_free(canon);
+ serd_tool_cleanup(app);
+ return st;
+ }
- if (!strcmp(argv[a], "--version")) {
- return serd_print_version(argv[0]);
- }
+ serd_sink_free(canon);
+ return serd_tool_cleanup(app);
+}
- for (int o = 1; argv[a][o]; ++o) {
- const char opt = argv[a][o];
-
- if (opt == 'C') {
- canonical = true;
- } else if (opt == 'h') {
- return print_usage(prog, false);
- } else if (opt == 'q') {
- quiet = true;
- } else if (opt == 'v') {
- return serd_print_version(argv[0]);
- } else if (argv[a][1] == 'B') {
- if (++a == argc) {
- return missing_arg(prog, 'B');
- }
-
- base = serd_node_new(NULL, serd_a_uri_string(argv[a]));
- break;
- } else if (opt == 'I') {
- if (argv[a][o + 1] || ++a == argc) {
- return missing_arg(prog, 'I');
- }
-
- if (serd_set_input_option(
- serd_string(argv[a]), &input_syntax, &reader_flags)) {
- return print_usage(argv[0], true);
- }
- break;
- } else if (opt == 'O') {
- if (argv[a][o + 1] || ++a == argc) {
- return missing_arg(prog, 'O');
- }
-
- if (serd_set_output_option(
- serd_string(argv[a]), &output_syntax, &writer_flags)) {
- return print_usage(argv[0], true);
- }
-
- osyntax_set =
- output_syntax != SERD_SYNTAX_EMPTY || !strcmp(argv[a], "empty");
-
- break;
- } else if (opt == 'b') {
- if (argv[a][o + 1] || ++a == argc) {
- return missing_arg(prog, 'b');
- }
-
- char* endptr = NULL;
- const long size = strtol(argv[a], &endptr, 10);
- if (size < 1 || size == LONG_MAX || *endptr != '\0') {
- SERDI_ERRORF("invalid block size `%s'\n", argv[a]);
- return 1;
- }
- block_size = (size_t)size;
- break;
- } else if (opt == 'k') {
- if (argv[a][o + 1] || ++a == argc) {
- return missing_arg(prog, 'k');
- }
-
- char* endptr = NULL;
- const long size = strtol(argv[a], &endptr, 10);
- if (size <= 0 || size == LONG_MAX || *endptr != '\0') {
- SERDI_ERRORF("invalid stack size '%s'\n", argv[a]);
- return 1;
- }
- stack_size = (size_t)size;
- break;
- } else if (opt == 'r') {
- if (argv[a][o + 1] || ++a == argc) {
- return missing_arg(prog, 'r');
- }
-
- root_uri = argv[a];
- break;
- } else if (opt == 's') {
- if (argv[a][o + 1] || ++a == argc) {
- return missing_arg(prog, 's');
- }
-
- input_string = argv[a];
- break;
- } else if (opt == 'w') {
- if (argv[a][o + 1] || ++a == argc) {
- return missing_arg(argv[0], 'w');
- }
-
- out_filename = argv[a];
- break;
- } else {
- SERDI_ERRORF("invalid option -- '%s'\n", argv[a] + 1);
- return print_usage(prog, true);
- }
- }
- }
+/* Command-line interface (before setting up serd) */
- if (a == argc && !input_string) {
- SERDI_ERROR("missing input\n");
- return print_usage(prog, true);
- }
+static int
+print_usage(const char* const name, const bool error)
+{
+ static const char* const description =
+ "Read and write RDF data.\n"
+ "INPUT can be a local filename, or \"-\" to read from standard input.\n\n"
+ " -B BASE_URI Base URI or path for resolving relative references.\n"
+ " -C Convert literals to canonical form.\n"
+ " -I SYNTAX Input syntax turtle/ntriples/trig/nquads, or option\n"
+ " lax/variables/relative/global/generated.\n"
+ " -O SYNTAX Output syntax empty/turtle/ntriples/nquads, or option\n"
+ " ascii/expanded/verbatim/terse/lax.\n"
+ " -R ROOT_URI Keep relative URIs within ROOT_URI.\n"
+ " -V Display version information and exit.\n"
+ " -b BYTES I/O block size.\n"
+ " -h Display this help and exit.\n"
+ " -k BYTES Parser stack size.\n"
+ " -o FILENAME Write output to FILENAME instead of stdout.\n"
+ " -q Suppress warning and error output.\n"
+ " -s STRING Parse STRING as input.\n";
- serd_set_stream_utf8_mode(stdin);
+ FILE* const os = error ? stderr : stdout;
+ fprintf(os, "%s", error ? "\n" : "");
+ fprintf(os, "Usage: %s [OPTION]... [INPUT]...\n", name);
+ fprintf(os, "%s", description);
+ return error;
+}
- char* const* const inputs = argv + a;
- const int n_inputs = argc - a;
+// Parse the option pointed to by `iter`, and advance it to the next one
+static SerdStatus
+parse_option(OptionIter* const iter, Options* const opts)
+{
+#define ARG_ERRORF(fmt, ...) \
+ fprintf(stderr, "%s: " fmt, iter->argv[0], __VA_ARGS__)
- bool input_has_graphs = serd_syntax_has_graphs(input_syntax);
- for (int i = a; i < argc; ++i) {
- if (serd_syntax_has_graphs(serd_guess_syntax(argv[i]))) {
- input_has_graphs = true;
- break;
- }
+ SerdStatus st = serd_parse_common_option(iter, &opts->common);
+ if (st != SERD_FAILURE) {
+ return st;
}
- if (!output_syntax && !osyntax_set) {
- output_syntax = input_has_graphs ? SERD_NQUADS : SERD_NTRIPLES;
+ if (!strcmp(iter->argv[iter->a], "--help")) {
+ print_usage(iter->argv[0], false);
+ return SERD_FAILURE;
}
- if (!base && n_inputs == 1 &&
- (output_syntax == SERD_NQUADS || output_syntax == SERD_NTRIPLES)) {
- // Choose base URI from the single input path
- char* const input_path = zix_canonical_path(NULL, inputs[0]);
- if (!input_path ||
- !(base = serd_node_new(
- NULL,
- serd_a_file_uri(serd_string(input_path), serd_empty_string())))) {
- SERDI_ERRORF("unable to determine base URI from path %s\n", inputs[0]);
- }
- zix_free(NULL, input_path);
+ if (!strcmp(iter->argv[iter->a], "--version")) {
+ return serd_print_version(iter->argv[0]);
}
- SerdWorld* const world = serd_world_new(NULL);
- const SerdLimits limits = {stack_size, MAX_DEPTH};
- serd_world_set_limits(world, limits);
-
- SerdEnv* const env = serd_env_new(
- NULL, base ? serd_node_string_view(base) : serd_empty_string());
+ const char opt = iter->argv[iter->a][iter->f];
+ switch (opt) {
+ case 'C':
+ opts->canonical = true;
+ return serd_option_iter_advance(iter);
- SerdOutputStream out = serd_open_tool_output(out_filename);
- if (!out.stream) {
- perror("serdi: error opening output file");
- return 1;
- }
+ case 'R':
+ return serd_get_argument(iter, &opts->root_uri);
- SerdWriter* const writer =
- serd_writer_new(world, output_syntax, writer_flags, env, &out, block_size);
+ case 'V':
+ return serd_print_version("serd-pipe");
- const SerdSink* sink = serd_writer_sink(writer);
+ case 'h':
+ print_usage(iter->argv[0], false);
+ return SERD_FAILURE;
- SerdSink* canon = NULL;
- if (canonical) {
- sink = canon = serd_canon_new(world, sink, reader_flags);
- }
+ case 'q':
+ opts->quiet = true;
+ return serd_option_iter_advance(iter);
- if (quiet) {
- serd_set_log_func(world, serd_quiet_log_func, NULL);
- }
+ case 's':
+ return serd_get_argument(iter, &opts->input_string);
- if (root_uri) {
- serd_writer_set_root_uri(writer, serd_string(root_uri));
+ default:
+ break;
}
- SerdStatus st = SERD_SUCCESS;
- if (input_string) {
- const char* position = input_string;
- SerdInputStream string_in = serd_open_input_string(&position);
+ ARG_ERRORF("invalid option -- '%c'\n", opt);
+ return SERD_BAD_ARG;
- SerdReader* const reader = serd_reader_new(
- world, input_syntax ? input_syntax : SERD_TRIG, reader_flags, env, sink);
+#undef ARG_ERRORF
+}
- if (!(st = serd_reader_start(reader, &string_in, NULL, 1U))) {
- st = serd_reader_read_document(reader);
+int
+main(const int argc, char* const* const argv)
+{
+ char default_input[] = {'-', '\0'};
+ char* default_inputs[] = {default_input};
+
+ Options opts = {{"",
+ NULL,
+ 4096U,
+ 1048576U,
+ {SERD_SYNTAX_EMPTY, 0U, false},
+ {SERD_SYNTAX_EMPTY, 0U, false}},
+ "",
+ NULL,
+ NULL,
+ 0U,
+ false,
+ false};
+
+ // Parse all command line options (which must precede inputs)
+ SerdStatus st = SERD_SUCCESS;
+ OptionIter iter = {argv, argc, 1, 1};
+ while (!serd_option_iter_is_end(iter)) {
+ if ((st = parse_option(&iter, &opts))) {
+ return (st == SERD_FAILURE) ? 0 : print_usage(argv[0], true);
}
-
- serd_reader_free(reader);
- serd_close_input(&string_in);
- }
-
- if (n_inputs == 1) {
- reader_flags |= SERD_READ_GLOBAL;
}
- for (int i = 0; !st && i < n_inputs; ++i) {
- if (!base && !!strcmp(inputs[i], "-")) {
- if ((st = serd_set_base_uri_from_path(env, inputs[i]))) {
- SERDI_ERRORF("failed to set base URI from path %s\n", inputs[i]);
- break;
- }
- }
-
- if ((st = read_file(world,
- serd_choose_syntax(world, input_syntax, inputs[i]),
- reader_flags,
- env,
- sink,
- stack_size,
- inputs[i],
- block_size))) {
- break;
- }
+ // Every argument past the last option is an input
+ opts.inputs = argv + iter.a;
+ opts.n_inputs = argc - iter.a;
+ if (opts.n_inputs + (bool)opts.input_string == 0) {
+ opts.n_inputs = 1;
+ opts.inputs = default_inputs;
}
- serd_sink_free(canon);
- serd_writer_free(writer);
- serd_env_free(env);
- serd_node_free(NULL, base);
- serd_world_free(world);
-
- if (serd_close_output(&out)) {
- perror("serd-pipe: write error");
- st = SERD_BAD_STREAM;
+ // Don't add prefixes to blank node labels if there is only one input
+ if (opts.n_inputs + (bool)opts.input_string == 1) {
+ opts.common.input.flags |= SERD_READ_GLOBAL;
}
- return (st > SERD_FAILURE) ? 1 : 0;
+ return run(opts) > SERD_FAILURE;
}