diff options
-rw-r--r-- | .gitlab-ci.yml | 4 | ||||
-rw-r--r-- | .reuse/dep5 | 2 | ||||
-rw-r--r-- | doc/man/serd-pipe.1 | 4 | ||||
-rw-r--r-- | test/meson.build | 8 | ||||
-rw-r--r-- | test/multifile/input1.ttl | 2 | ||||
-rw-r--r-- | test/multifile/input2.trig | 7 | ||||
-rw-r--r-- | test/multifile/output.nq | 3 | ||||
-rwxr-xr-x | test/test_multifile.py | 32 | ||||
-rw-r--r-- | tools/console.c | 34 | ||||
-rw-r--r-- | tools/console.h | 5 | ||||
-rw-r--r-- | tools/serd-pipe.c | 165 |
11 files changed, 222 insertions, 44 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 19937395..cc5e5b29 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -17,11 +17,11 @@ dev: stage: build image: lv2plugin/debian-x64 script: - - meson setup build -Db_coverage=true -Dbuildtype=debug -Dc_std=c11 -Ddocs=enabled -Dlint=true -Dprefix=/ -Dwarning_level=3 -Dwerror=true + - meson setup build -Db_coverage=true -Dbuildtype=debug -Dc_std=c11 -Ddocs=enabled -Dprefix=/ -Dwarning_level=3 -Dwerror=true - ninja -C build test - ninja -C build coverage-html - DESTDIR=$(pwd)/build/dest meson install -C build - - meson configure -Dbuildtype=release -Db_coverage=false -Dlint=false build + - meson configure -Dbuildtype=release -Db_coverage=false build - ninja -C build test coverage: '/ *lines\.*: \d+\.\d+.*/' artifacts: diff --git a/.reuse/dep5 b/.reuse/dep5 index 1c237f2b..26922e03 100644 --- a/.reuse/dep5 +++ b/.reuse/dep5 @@ -8,7 +8,7 @@ Copyright: 2010 World Wide Web Consortium, (MIT, ERCIM, Keio, Beihang) and other Comment: Standard test suites from the W3C License: BSD-3-Clause -Files: test/extra/* +Files: test/extra/* test/multifile/* Copyright: 2011-2023 David Robillard <d@drobilla.net> Comment: Extra test suites for serd License: BSD-3-Clause OR ISC diff --git a/doc/man/serd-pipe.1 b/doc/man/serd-pipe.1 index f53761c8..b8d2fd23 100644 --- a/doc/man/serd-pipe.1 +++ b/doc/man/serd-pipe.1 @@ -17,11 +17,11 @@ .Op Fl p Ar prefix .Op Fl r Ar root .Op Fl s Ar string -.Ar input +.Op Ar input ... .Sh DESCRIPTION .Nm is a fast command-line utility for streaming and processing RDF data. -It reads an RDF document and writes the data to stdout, +It reads one or more RDF documents and writes the data to stdout, possibly transformed and/or in a different syntax. By default, the input syntax is guessed from the file extension, diff --git a/test/meson.build b/test/meson.build index fd8586a8..489a6ee5 100644 --- a/test/meson.build +++ b/test/meson.build @@ -18,6 +18,7 @@ simple_script_paths = [ 'run_suite.py', 'test_base.py', 'test_empty.py', + 'test_multifile.py', 'test_quiet.py', 'test_stdin.py', 'test_write_error.py', @@ -275,6 +276,13 @@ if is_variable('serd_pipe') env: test_env, suite: input_suite, ) + test( + 'multifile', + files('test_multifile.py'), + args: pipe_script_args + [meson.current_source_dir() / 'multifile'], + env: test_env, + suite: input_suite, + ) # Output diff --git a/test/multifile/input1.ttl b/test/multifile/input1.ttl new file mode 100644 index 00000000..88c3f8e9 --- /dev/null +++ b/test/multifile/input1.ttl @@ -0,0 +1,2 @@ +[] + a <http://example.org/Type> . diff --git a/test/multifile/input2.trig b/test/multifile/input2.trig new file mode 100644 index 00000000..260080a8 --- /dev/null +++ b/test/multifile/input2.trig @@ -0,0 +1,7 @@ +[] + a <http://example.org/Type> . + +<http://example.org/graph> { + [] + a <http://example.org/OtherType> . +} diff --git a/test/multifile/output.nq b/test/multifile/output.nq new file mode 100644 index 00000000..dd35dc4d --- /dev/null +++ b/test/multifile/output.nq @@ -0,0 +1,3 @@ +_:f0b1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Type> . +_:f1b1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Type> . +_:f1b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/OtherType> <http://example.org/graph> . diff --git a/test/test_multifile.py b/test/test_multifile.py new file mode 100755 index 00000000..e8fa0775 --- /dev/null +++ b/test/test_multifile.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 + +# Copyright 2019-2023 David Robillard <d@drobilla.net> +# SPDX-License-Identifier: ISC + +"""Test reading from several input files.""" + +# pylint: disable=duplicate-code + +import os +import shlex +import subprocess +import tempfile + +import serd_test_util as util + +args = util.wrapper_args(__doc__, True) +testdir = args.input +in1_path = os.path.join(testdir, "input1.ttl") +in2_path = os.path.join(testdir, "input2.trig") +check_path = os.path.join(testdir, "output.nq") +command = shlex.split(args.wrapper) + [args.tool, in1_path, in2_path] + + +with tempfile.TemporaryFile(mode="w+", encoding="utf-8") as out: + proc = subprocess.run(command, check=False, stdout=out) + + assert proc.returncode == 0 + + out.seek(0) + with open(check_path, "r", encoding="utf-8") as check: + assert util.lines_equal(list(check), list(out), check_path, "output") diff --git a/tools/console.c b/tools/console.c index 4a127c3e..f6a15ecb 100644 --- a/tools/console.c +++ b/tools/console.c @@ -13,6 +13,9 @@ # include <io.h> #endif +#include <stdint.h> +#include <string.h> + void serd_set_stream_utf8_mode(FILE* const stream) { @@ -39,3 +42,34 @@ serd_print_version(const char* const program) return 0; } + +/// Wrapper for getc that is compatible with SerdReadFunc but faster than fread +static size_t +serd_file_read_byte(void* buf, size_t size, size_t nmemb, void* stream) +{ + (void)size; + (void)nmemb; + + const int c = getc((FILE*)stream); + if (c == EOF) { + *((uint8_t*)buf) = 0; + return 0; + } + + *((uint8_t*)buf) = (uint8_t)c; + return 1; +} + +SerdInputStream +serd_open_tool_input(const char* const filename) +{ + if (!strcmp(filename, "-")) { + const SerdInputStream in = serd_open_input_stream( + serd_file_read_byte, (SerdErrorFunc)ferror, NULL, stdin); + + serd_set_stream_utf8_mode(stdin); + return in; + } + + return serd_open_input_file(filename); +} diff --git a/tools/console.h b/tools/console.h index 41ab328d..ef0fb1d1 100644 --- a/tools/console.h +++ b/tools/console.h @@ -4,6 +4,8 @@ #ifndef SERD_TOOLS_CONSOLE_H #define SERD_TOOLS_CONSOLE_H +#include "serd/input_stream.h" + #include <stdio.h> void @@ -12,4 +14,7 @@ serd_set_stream_utf8_mode(FILE* stream); int serd_print_version(const char* program); +SerdInputStream +serd_open_tool_input(const char* filename); + #endif // SERD_TOOLS_CONSOLE_H diff --git a/tools/serd-pipe.c b/tools/serd-pipe.c index 7cc63a30..894a422b 100644 --- a/tools/serd-pipe.c +++ b/tools/serd-pipe.c @@ -9,13 +9,17 @@ #include "serd/node.h" #include "serd/output_stream.h" #include "serd/reader.h" +#include "serd/sink.h" #include "serd/status.h" #include "serd/stream.h" #include "serd/string_view.h" #include "serd/syntax.h" #include "serd/world.h" #include "serd/writer.h" +#include "zix/allocator.h" +#include "zix/filesystem.h" +#include <errno.h> #include <limits.h> #include <stdbool.h> #include <stdio.h> @@ -26,6 +30,7 @@ #define SERDI_ERRORF(fmt, ...) fprintf(stderr, "serd-pipe: " fmt, __VA_ARGS__) #define MAX_DEPTH 128U +#define SERD_PAGE_SIZE 4096U static int print_usage(const char* const name, const bool error) @@ -53,7 +58,7 @@ print_usage(const char* const name, const bool error) FILE* const os = error ? stderr : stdout; fprintf(os, "%s", error ? "\n" : ""); - fprintf(os, "Usage: %s [OPTION]... INPUT\n", name); + fprintf(os, "Usage: %s [OPTION]... INPUT...\n", name); fprintf(os, "%s", description); return error ? 1 : 0; } @@ -73,6 +78,46 @@ quiet_error_func(void* const handle, const SerdError* const e) return SERD_SUCCESS; } +static SerdStatus +read_file(SerdWorld* const world, + SerdSyntax syntax, + const SerdReaderFlags flags, + const SerdSink* const sink, + const size_t stack_size, + const char* const filename, + const char* const add_prefix, + const bool bulk_read) +{ + syntax = syntax ? syntax : serd_guess_syntax(filename); + syntax = syntax ? syntax : SERD_TRIG; + + SerdInputStream in = serd_open_tool_input(filename); + if (!in.stream) { + SERDI_ERRORF( + "failed to open input file `%s' (%s)\n", filename, strerror(errno)); + + return SERD_BAD_STREAM; + } + + SerdLimits limits = serd_world_limits(world); + limits.reader_stack_size = stack_size; + serd_world_set_limits(world, limits); + + SerdReader* reader = serd_reader_new(world, syntax, flags, sink); + + serd_reader_add_blank_prefix(reader, add_prefix); + + SerdStatus st = + serd_reader_start(reader, &in, NULL, bulk_read ? SERD_PAGE_SIZE : 1U); + + st = st ? st : serd_reader_read_document(reader); + + serd_reader_free(reader); + serd_close_input(&in); + + return st; +} + int main(int argc, char** argv) { @@ -83,20 +128,18 @@ main(int argc, char** argv) SerdSyntax output_syntax = SERD_SYNTAX_EMPTY; SerdReaderFlags reader_flags = 0; SerdWriterFlags writer_flags = 0; - bool from_stdin = false; bool bulk_read = true; bool bulk_write = false; bool osyntax_set = false; bool quiet = false; size_t stack_size = 1048576U; const char* input_string = NULL; - const char* add_prefix = NULL; + const char* add_prefix = ""; const char* chop_prefix = NULL; const char* root_uri = NULL; int a = 1; for (; a < argc && argv[a][0] == '-'; ++a) { if (argv[a][1] == '\0') { - from_stdin = true; break; } @@ -214,24 +257,38 @@ main(int argc, char** argv) serd_set_stream_utf8_mode(stdin); serd_set_stream_utf8_mode(stdout); - const char* input = argv[a++]; + char* const* const inputs = argv + a; + const int n_inputs = argc - a; - if ((!input_syntax && !input) || !(input_syntax = serd_guess_syntax(input))) { - input_syntax = SERD_TRIG; + bool input_has_graphs = serd_syntax_has_graphs(input_syntax); + for (int i = a; i < argc; ++i) { + if (serd_syntax_has_graphs(serd_guess_syntax(argv[i]))) { + input_has_graphs = true; + break; + } } - const bool input_has_graphs = serd_syntax_has_graphs(input_syntax); if (!output_syntax && !osyntax_set) { output_syntax = input_has_graphs ? SERD_NQUADS : SERD_NTRIPLES; } - if (!base && input) { // Use input file URI - base = serd_new_file_uri(serd_string(input), serd_empty_string()); + if (!base && n_inputs == 1 && + (output_syntax == SERD_NQUADS || output_syntax == SERD_NTRIPLES)) { + // Choose base URI from the single input path + char* const input_path = zix_canonical_path(NULL, inputs[0]); + if (!input_path || !(base = serd_new_file_uri(serd_string(input_path), + serd_empty_string()))) { + SERDI_ERRORF("unable to determine base URI from path %s\n", inputs[0]); + } + zix_free(NULL, input_path); } FILE* const out_fd = stdout; SerdWorld* const world = serd_world_new(); - SerdEnv* const env = + const SerdLimits limits = {stack_size, MAX_DEPTH}; + serd_world_set_limits(world, limits); + + SerdEnv* const env = serd_env_new(base ? serd_node_string_view(base) : serd_empty_string()); SerdOutputStream out = serd_open_output_stream((SerdWriteFunc)fwrite, @@ -242,12 +299,6 @@ main(int argc, char** argv) SerdWriter* const writer = serd_writer_new( world, output_syntax, writer_flags, env, &out, bulk_write ? 4096U : 1U); - const SerdLimits limits = {stack_size, MAX_DEPTH}; - serd_world_set_limits(world, limits); - - SerdReader* const reader = serd_reader_new( - world, input_syntax, reader_flags, serd_writer_sink(writer)); - if (quiet) { serd_world_set_error_func(world, quiet_error_func, NULL); } @@ -257,34 +308,70 @@ main(int argc, char** argv) } serd_writer_chop_blank_prefix(writer, chop_prefix); - serd_reader_add_blank_prefix(reader, add_prefix); - SerdStatus st = SERD_SUCCESS; - SerdNode* input_name = NULL; - const char* position = NULL; - SerdInputStream in = {NULL, NULL, NULL, NULL}; - size_t block_size = 1U; + SerdStatus st = SERD_SUCCESS; + SerdNode* input_name = NULL; if (input_string) { - position = input_string; - in = serd_open_input_string(&position); - input_name = serd_new_string(serd_string("string")); - } else if (from_stdin) { - in = serd_open_input_stream( - (SerdReadFunc)fread, (SerdErrorFunc)ferror, (SerdCloseFunc)fclose, stdin); - input_name = serd_new_string(serd_string("stdin")); - } else { - block_size = bulk_read ? 4096U : 1U; - in = serd_open_input_file(input); - input_name = serd_new_string(serd_string(input)); + const char* position = input_string; + SerdInputStream string_in = serd_open_input_string(&position); + + SerdReader* const reader = + serd_reader_new(world, + input_syntax ? input_syntax : SERD_TRIG, + reader_flags, + serd_writer_sink(writer)); + + serd_reader_add_blank_prefix(reader, add_prefix); + + if (!(st = serd_reader_start(reader, &string_in, NULL, 1U))) { + st = serd_reader_read_document(reader); + } + + serd_reader_free(reader); + serd_close_input(&string_in); } - if (!(st = serd_reader_start(reader, &in, input_name, block_size))) { - st = serd_reader_read_document(reader); + size_t prefix_len = 0; + char* prefix = NULL; + if (n_inputs > 1) { + prefix_len = 8 + strlen(add_prefix); + prefix = (char*)calloc(1, prefix_len); } - serd_reader_finish(reader); - serd_reader_free(reader); - serd_writer_finish(writer); + for (int i = 0; !st && i < n_inputs; ++i) { + if (!base && !!strcmp(inputs[i], "-")) { + char* const input_path = zix_canonical_path(NULL, inputs[i]); + if (!input_path) { + SERDI_ERRORF("failed to resolve path %s\n", inputs[i]); + st = SERD_BAD_ARG; + break; + } + + SerdNode* const file_uri = + serd_new_file_uri(serd_string(input_path), serd_empty_string()); + + serd_env_set_base_uri(env, serd_node_string_view(file_uri)); + serd_node_free(file_uri); + zix_free(NULL, input_path); + } + + if (n_inputs > 1) { + snprintf(prefix, prefix_len, "f%d%s", i, add_prefix); + } + + if ((st = read_file(world, + input_syntax, + reader_flags, + serd_writer_sink(writer), + stack_size, + inputs[i], + n_inputs > 1 ? prefix : add_prefix, + bulk_read))) { + break; + } + } + free(prefix); + serd_writer_free(writer); serd_node_free(input_name); serd_env_free(env); |