aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.gitlab-ci.yml4
-rw-r--r--.reuse/dep52
-rw-r--r--doc/man/serd-pipe.14
-rw-r--r--test/meson.build8
-rw-r--r--test/multifile/input1.ttl2
-rw-r--r--test/multifile/input2.trig7
-rw-r--r--test/multifile/output.nq3
-rwxr-xr-xtest/test_multifile.py32
-rw-r--r--tools/console.c34
-rw-r--r--tools/console.h5
-rw-r--r--tools/serd-pipe.c165
11 files changed, 222 insertions, 44 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 19937395..cc5e5b29 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -17,11 +17,11 @@ dev:
stage: build
image: lv2plugin/debian-x64
script:
- - meson setup build -Db_coverage=true -Dbuildtype=debug -Dc_std=c11 -Ddocs=enabled -Dlint=true -Dprefix=/ -Dwarning_level=3 -Dwerror=true
+ - meson setup build -Db_coverage=true -Dbuildtype=debug -Dc_std=c11 -Ddocs=enabled -Dprefix=/ -Dwarning_level=3 -Dwerror=true
- ninja -C build test
- ninja -C build coverage-html
- DESTDIR=$(pwd)/build/dest meson install -C build
- - meson configure -Dbuildtype=release -Db_coverage=false -Dlint=false build
+ - meson configure -Dbuildtype=release -Db_coverage=false build
- ninja -C build test
coverage: '/ *lines\.*: \d+\.\d+.*/'
artifacts:
diff --git a/.reuse/dep5 b/.reuse/dep5
index 1c237f2b..26922e03 100644
--- a/.reuse/dep5
+++ b/.reuse/dep5
@@ -8,7 +8,7 @@ Copyright: 2010 World Wide Web Consortium, (MIT, ERCIM, Keio, Beihang) and other
Comment: Standard test suites from the W3C
License: BSD-3-Clause
-Files: test/extra/*
+Files: test/extra/* test/multifile/*
Copyright: 2011-2023 David Robillard <d@drobilla.net>
Comment: Extra test suites for serd
License: BSD-3-Clause OR ISC
diff --git a/doc/man/serd-pipe.1 b/doc/man/serd-pipe.1
index f53761c8..b8d2fd23 100644
--- a/doc/man/serd-pipe.1
+++ b/doc/man/serd-pipe.1
@@ -17,11 +17,11 @@
.Op Fl p Ar prefix
.Op Fl r Ar root
.Op Fl s Ar string
-.Ar input
+.Op Ar input ...
.Sh DESCRIPTION
.Nm
is a fast command-line utility for streaming and processing RDF data.
-It reads an RDF document and writes the data to stdout,
+It reads one or more RDF documents and writes the data to stdout,
possibly transformed and/or in a different syntax.
By default,
the input syntax is guessed from the file extension,
diff --git a/test/meson.build b/test/meson.build
index fd8586a8..489a6ee5 100644
--- a/test/meson.build
+++ b/test/meson.build
@@ -18,6 +18,7 @@ simple_script_paths = [
'run_suite.py',
'test_base.py',
'test_empty.py',
+ 'test_multifile.py',
'test_quiet.py',
'test_stdin.py',
'test_write_error.py',
@@ -275,6 +276,13 @@ if is_variable('serd_pipe')
env: test_env,
suite: input_suite,
)
+ test(
+ 'multifile',
+ files('test_multifile.py'),
+ args: pipe_script_args + [meson.current_source_dir() / 'multifile'],
+ env: test_env,
+ suite: input_suite,
+ )
# Output
diff --git a/test/multifile/input1.ttl b/test/multifile/input1.ttl
new file mode 100644
index 00000000..88c3f8e9
--- /dev/null
+++ b/test/multifile/input1.ttl
@@ -0,0 +1,2 @@
+[]
+ a <http://example.org/Type> .
diff --git a/test/multifile/input2.trig b/test/multifile/input2.trig
new file mode 100644
index 00000000..260080a8
--- /dev/null
+++ b/test/multifile/input2.trig
@@ -0,0 +1,7 @@
+[]
+ a <http://example.org/Type> .
+
+<http://example.org/graph> {
+ []
+ a <http://example.org/OtherType> .
+}
diff --git a/test/multifile/output.nq b/test/multifile/output.nq
new file mode 100644
index 00000000..dd35dc4d
--- /dev/null
+++ b/test/multifile/output.nq
@@ -0,0 +1,3 @@
+_:f0b1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Type> .
+_:f1b1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Type> .
+_:f1b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/OtherType> <http://example.org/graph> .
diff --git a/test/test_multifile.py b/test/test_multifile.py
new file mode 100755
index 00000000..e8fa0775
--- /dev/null
+++ b/test/test_multifile.py
@@ -0,0 +1,32 @@
+#!/usr/bin/env python3
+
+# Copyright 2019-2023 David Robillard <d@drobilla.net>
+# SPDX-License-Identifier: ISC
+
+"""Test reading from several input files."""
+
+# pylint: disable=duplicate-code
+
+import os
+import shlex
+import subprocess
+import tempfile
+
+import serd_test_util as util
+
+args = util.wrapper_args(__doc__, True)
+testdir = args.input
+in1_path = os.path.join(testdir, "input1.ttl")
+in2_path = os.path.join(testdir, "input2.trig")
+check_path = os.path.join(testdir, "output.nq")
+command = shlex.split(args.wrapper) + [args.tool, in1_path, in2_path]
+
+
+with tempfile.TemporaryFile(mode="w+", encoding="utf-8") as out:
+ proc = subprocess.run(command, check=False, stdout=out)
+
+ assert proc.returncode == 0
+
+ out.seek(0)
+ with open(check_path, "r", encoding="utf-8") as check:
+ assert util.lines_equal(list(check), list(out), check_path, "output")
diff --git a/tools/console.c b/tools/console.c
index 4a127c3e..f6a15ecb 100644
--- a/tools/console.c
+++ b/tools/console.c
@@ -13,6 +13,9 @@
# include <io.h>
#endif
+#include <stdint.h>
+#include <string.h>
+
void
serd_set_stream_utf8_mode(FILE* const stream)
{
@@ -39,3 +42,34 @@ serd_print_version(const char* const program)
return 0;
}
+
+/// Wrapper for getc that is compatible with SerdReadFunc but faster than fread
+static size_t
+serd_file_read_byte(void* buf, size_t size, size_t nmemb, void* stream)
+{
+ (void)size;
+ (void)nmemb;
+
+ const int c = getc((FILE*)stream);
+ if (c == EOF) {
+ *((uint8_t*)buf) = 0;
+ return 0;
+ }
+
+ *((uint8_t*)buf) = (uint8_t)c;
+ return 1;
+}
+
+SerdInputStream
+serd_open_tool_input(const char* const filename)
+{
+ if (!strcmp(filename, "-")) {
+ const SerdInputStream in = serd_open_input_stream(
+ serd_file_read_byte, (SerdErrorFunc)ferror, NULL, stdin);
+
+ serd_set_stream_utf8_mode(stdin);
+ return in;
+ }
+
+ return serd_open_input_file(filename);
+}
diff --git a/tools/console.h b/tools/console.h
index 41ab328d..ef0fb1d1 100644
--- a/tools/console.h
+++ b/tools/console.h
@@ -4,6 +4,8 @@
#ifndef SERD_TOOLS_CONSOLE_H
#define SERD_TOOLS_CONSOLE_H
+#include "serd/input_stream.h"
+
#include <stdio.h>
void
@@ -12,4 +14,7 @@ serd_set_stream_utf8_mode(FILE* stream);
int
serd_print_version(const char* program);
+SerdInputStream
+serd_open_tool_input(const char* filename);
+
#endif // SERD_TOOLS_CONSOLE_H
diff --git a/tools/serd-pipe.c b/tools/serd-pipe.c
index 7cc63a30..894a422b 100644
--- a/tools/serd-pipe.c
+++ b/tools/serd-pipe.c
@@ -9,13 +9,17 @@
#include "serd/node.h"
#include "serd/output_stream.h"
#include "serd/reader.h"
+#include "serd/sink.h"
#include "serd/status.h"
#include "serd/stream.h"
#include "serd/string_view.h"
#include "serd/syntax.h"
#include "serd/world.h"
#include "serd/writer.h"
+#include "zix/allocator.h"
+#include "zix/filesystem.h"
+#include <errno.h>
#include <limits.h>
#include <stdbool.h>
#include <stdio.h>
@@ -26,6 +30,7 @@
#define SERDI_ERRORF(fmt, ...) fprintf(stderr, "serd-pipe: " fmt, __VA_ARGS__)
#define MAX_DEPTH 128U
+#define SERD_PAGE_SIZE 4096U
static int
print_usage(const char* const name, const bool error)
@@ -53,7 +58,7 @@ print_usage(const char* const name, const bool error)
FILE* const os = error ? stderr : stdout;
fprintf(os, "%s", error ? "\n" : "");
- fprintf(os, "Usage: %s [OPTION]... INPUT\n", name);
+ fprintf(os, "Usage: %s [OPTION]... INPUT...\n", name);
fprintf(os, "%s", description);
return error ? 1 : 0;
}
@@ -73,6 +78,46 @@ quiet_error_func(void* const handle, const SerdError* const e)
return SERD_SUCCESS;
}
+static SerdStatus
+read_file(SerdWorld* const world,
+ SerdSyntax syntax,
+ const SerdReaderFlags flags,
+ const SerdSink* const sink,
+ const size_t stack_size,
+ const char* const filename,
+ const char* const add_prefix,
+ const bool bulk_read)
+{
+ syntax = syntax ? syntax : serd_guess_syntax(filename);
+ syntax = syntax ? syntax : SERD_TRIG;
+
+ SerdInputStream in = serd_open_tool_input(filename);
+ if (!in.stream) {
+ SERDI_ERRORF(
+ "failed to open input file `%s' (%s)\n", filename, strerror(errno));
+
+ return SERD_BAD_STREAM;
+ }
+
+ SerdLimits limits = serd_world_limits(world);
+ limits.reader_stack_size = stack_size;
+ serd_world_set_limits(world, limits);
+
+ SerdReader* reader = serd_reader_new(world, syntax, flags, sink);
+
+ serd_reader_add_blank_prefix(reader, add_prefix);
+
+ SerdStatus st =
+ serd_reader_start(reader, &in, NULL, bulk_read ? SERD_PAGE_SIZE : 1U);
+
+ st = st ? st : serd_reader_read_document(reader);
+
+ serd_reader_free(reader);
+ serd_close_input(&in);
+
+ return st;
+}
+
int
main(int argc, char** argv)
{
@@ -83,20 +128,18 @@ main(int argc, char** argv)
SerdSyntax output_syntax = SERD_SYNTAX_EMPTY;
SerdReaderFlags reader_flags = 0;
SerdWriterFlags writer_flags = 0;
- bool from_stdin = false;
bool bulk_read = true;
bool bulk_write = false;
bool osyntax_set = false;
bool quiet = false;
size_t stack_size = 1048576U;
const char* input_string = NULL;
- const char* add_prefix = NULL;
+ const char* add_prefix = "";
const char* chop_prefix = NULL;
const char* root_uri = NULL;
int a = 1;
for (; a < argc && argv[a][0] == '-'; ++a) {
if (argv[a][1] == '\0') {
- from_stdin = true;
break;
}
@@ -214,24 +257,38 @@ main(int argc, char** argv)
serd_set_stream_utf8_mode(stdin);
serd_set_stream_utf8_mode(stdout);
- const char* input = argv[a++];
+ char* const* const inputs = argv + a;
+ const int n_inputs = argc - a;
- if ((!input_syntax && !input) || !(input_syntax = serd_guess_syntax(input))) {
- input_syntax = SERD_TRIG;
+ bool input_has_graphs = serd_syntax_has_graphs(input_syntax);
+ for (int i = a; i < argc; ++i) {
+ if (serd_syntax_has_graphs(serd_guess_syntax(argv[i]))) {
+ input_has_graphs = true;
+ break;
+ }
}
- const bool input_has_graphs = serd_syntax_has_graphs(input_syntax);
if (!output_syntax && !osyntax_set) {
output_syntax = input_has_graphs ? SERD_NQUADS : SERD_NTRIPLES;
}
- if (!base && input) { // Use input file URI
- base = serd_new_file_uri(serd_string(input), serd_empty_string());
+ if (!base && n_inputs == 1 &&
+ (output_syntax == SERD_NQUADS || output_syntax == SERD_NTRIPLES)) {
+ // Choose base URI from the single input path
+ char* const input_path = zix_canonical_path(NULL, inputs[0]);
+ if (!input_path || !(base = serd_new_file_uri(serd_string(input_path),
+ serd_empty_string()))) {
+ SERDI_ERRORF("unable to determine base URI from path %s\n", inputs[0]);
+ }
+ zix_free(NULL, input_path);
}
FILE* const out_fd = stdout;
SerdWorld* const world = serd_world_new();
- SerdEnv* const env =
+ const SerdLimits limits = {stack_size, MAX_DEPTH};
+ serd_world_set_limits(world, limits);
+
+ SerdEnv* const env =
serd_env_new(base ? serd_node_string_view(base) : serd_empty_string());
SerdOutputStream out = serd_open_output_stream((SerdWriteFunc)fwrite,
@@ -242,12 +299,6 @@ main(int argc, char** argv)
SerdWriter* const writer = serd_writer_new(
world, output_syntax, writer_flags, env, &out, bulk_write ? 4096U : 1U);
- const SerdLimits limits = {stack_size, MAX_DEPTH};
- serd_world_set_limits(world, limits);
-
- SerdReader* const reader = serd_reader_new(
- world, input_syntax, reader_flags, serd_writer_sink(writer));
-
if (quiet) {
serd_world_set_error_func(world, quiet_error_func, NULL);
}
@@ -257,34 +308,70 @@ main(int argc, char** argv)
}
serd_writer_chop_blank_prefix(writer, chop_prefix);
- serd_reader_add_blank_prefix(reader, add_prefix);
- SerdStatus st = SERD_SUCCESS;
- SerdNode* input_name = NULL;
- const char* position = NULL;
- SerdInputStream in = {NULL, NULL, NULL, NULL};
- size_t block_size = 1U;
+ SerdStatus st = SERD_SUCCESS;
+ SerdNode* input_name = NULL;
if (input_string) {
- position = input_string;
- in = serd_open_input_string(&position);
- input_name = serd_new_string(serd_string("string"));
- } else if (from_stdin) {
- in = serd_open_input_stream(
- (SerdReadFunc)fread, (SerdErrorFunc)ferror, (SerdCloseFunc)fclose, stdin);
- input_name = serd_new_string(serd_string("stdin"));
- } else {
- block_size = bulk_read ? 4096U : 1U;
- in = serd_open_input_file(input);
- input_name = serd_new_string(serd_string(input));
+ const char* position = input_string;
+ SerdInputStream string_in = serd_open_input_string(&position);
+
+ SerdReader* const reader =
+ serd_reader_new(world,
+ input_syntax ? input_syntax : SERD_TRIG,
+ reader_flags,
+ serd_writer_sink(writer));
+
+ serd_reader_add_blank_prefix(reader, add_prefix);
+
+ if (!(st = serd_reader_start(reader, &string_in, NULL, 1U))) {
+ st = serd_reader_read_document(reader);
+ }
+
+ serd_reader_free(reader);
+ serd_close_input(&string_in);
}
- if (!(st = serd_reader_start(reader, &in, input_name, block_size))) {
- st = serd_reader_read_document(reader);
+ size_t prefix_len = 0;
+ char* prefix = NULL;
+ if (n_inputs > 1) {
+ prefix_len = 8 + strlen(add_prefix);
+ prefix = (char*)calloc(1, prefix_len);
}
- serd_reader_finish(reader);
- serd_reader_free(reader);
- serd_writer_finish(writer);
+ for (int i = 0; !st && i < n_inputs; ++i) {
+ if (!base && !!strcmp(inputs[i], "-")) {
+ char* const input_path = zix_canonical_path(NULL, inputs[i]);
+ if (!input_path) {
+ SERDI_ERRORF("failed to resolve path %s\n", inputs[i]);
+ st = SERD_BAD_ARG;
+ break;
+ }
+
+ SerdNode* const file_uri =
+ serd_new_file_uri(serd_string(input_path), serd_empty_string());
+
+ serd_env_set_base_uri(env, serd_node_string_view(file_uri));
+ serd_node_free(file_uri);
+ zix_free(NULL, input_path);
+ }
+
+ if (n_inputs > 1) {
+ snprintf(prefix, prefix_len, "f%d%s", i, add_prefix);
+ }
+
+ if ((st = read_file(world,
+ input_syntax,
+ reader_flags,
+ serd_writer_sink(writer),
+ stack_size,
+ inputs[i],
+ n_inputs > 1 ? prefix : add_prefix,
+ bulk_read))) {
+ break;
+ }
+ }
+ free(prefix);
+
serd_writer_free(writer);
serd_node_free(input_name);
serd_env_free(env);