aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2019-05-05 16:12:38 +0200
committerDavid Robillard <d@drobilla.net>2022-01-13 23:05:24 -0500
commitb98bd7a32cf4302e0a210dd8558edd3ab2088525 (patch)
tree5f4960229abe31fab1683341609fe37319a49a91
parentf7d10ea309fb52d09a58b2832fe4a09a120b16aa (diff)
downloadserd-b98bd7a32cf4302e0a210dd8558edd3ab2088525.tar.gz
serd-b98bd7a32cf4302e0a210dd8558edd3ab2088525.tar.bz2
serd-b98bd7a32cf4302e0a210dd8558edd3ab2088525.zip
Add support for reading multiple files at once
-rw-r--r--doc/serdi.14
-rw-r--r--src/reader.c3
-rw-r--r--src/serdi.c138
-rw-r--r--test/meson.build5
-rw-r--r--test/multifile/input1.ttl2
-rw-r--r--test/multifile/input2.trig7
-rw-r--r--test/multifile/output.nq3
-rwxr-xr-xtest/test_multifile.py52
-rwxr-xr-xtest/test_stdin.py7
9 files changed, 186 insertions, 35 deletions
diff --git a/doc/serdi.1 b/doc/serdi.1
index b2c94d2c..f9c98492 100644
--- a/doc/serdi.1
+++ b/doc/serdi.1
@@ -16,11 +16,11 @@
.Op Fl r Ar root
.Op Fl s Ar string
.Op Fl w Ar filename
-.Ar input
+.Ar input ...
.Sh DESCRIPTION
.Nm
is a fast command-line utility for streaming and processing RDF data.
-It reads an RDF document and writes the data again,
+It reads one or more RDF documents and writes the data again,
possibly transformed and/or in a different syntax.
By default,
the input syntax is guessed from the file extension,
diff --git a/src/reader.c b/src/reader.c
index 0f720d9b..ed6caafd 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -24,7 +24,6 @@
#include "system.h"
#include "world.h"
-#include <errno.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
@@ -311,8 +310,6 @@ serd_reader_prepare(SerdReader* const reader)
st = skip_bom(reader);
} else if (st == SERD_FAILURE) {
reader->source.eof = true;
- } else {
- r_err(reader, st, "error preparing read: %s\n", strerror(errno));
}
return st;
}
diff --git a/src/serdi.c b/src/serdi.c
index f2da0115..2e04ae5a 100644
--- a/src/serdi.c
+++ b/src/serdi.c
@@ -91,6 +91,42 @@ quiet_error_func(void* const handle, const SerdError* const e)
return SERD_SUCCESS;
}
+static SerdStatus
+read_file(SerdWorld* const world,
+ SerdSyntax syntax,
+ const SerdReaderFlags flags,
+ const SerdSink* const sink,
+ const size_t stack_size,
+ const char* const filename,
+ const char* const add_prefix,
+ const bool bulk_read)
+{
+ syntax = syntax ? syntax : serd_guess_syntax(filename);
+ syntax = syntax ? syntax : SERD_TRIG;
+
+ SerdStatus st = SERD_SUCCESS;
+ SerdReader* reader = serd_reader_new(world, syntax, flags, sink, stack_size);
+
+ serd_reader_add_blank_prefix(reader, add_prefix);
+
+ if (!strcmp(filename, "-")) {
+ SerdNode* name = serd_new_string(SERD_STRING("stdin"));
+
+ st = serd_reader_start_stream(
+ reader, serd_file_read_byte, (SerdStreamErrorFunc)ferror, stdin, name, 1);
+
+ serd_node_free(name);
+ } else {
+ st = serd_reader_start_file(reader, filename, bulk_read);
+ }
+
+ st = st ? st : serd_reader_read_document(reader);
+
+ serd_reader_free(reader);
+
+ return st;
+}
+
int
main(int argc, char** argv)
{
@@ -104,21 +140,19 @@ main(int argc, char** argv)
SerdSyntax output_syntax = SERD_SYNTAX_EMPTY;
SerdReaderFlags reader_flags = 0;
SerdWriterFlags writer_flags = 0;
- bool from_stdin = false;
bool bulk_read = true;
bool bulk_write = false;
bool osyntax_set = false;
bool quiet = false;
size_t stack_size = 4194304;
const char* input_string = NULL;
- const char* add_prefix = NULL;
+ const char* add_prefix = "";
const char* chop_prefix = NULL;
const char* root_uri = NULL;
const char* out_filename = NULL;
int a = 1;
for (; a < argc && argv[a][0] == '-'; ++a) {
if (argv[a][1] == '\0') {
- from_stdin = true;
break;
}
@@ -232,19 +266,30 @@ main(int argc, char** argv)
return 1;
}
- const char* input = argv[a++];
+ char* const* const inputs = argv + a;
+ const int n_inputs = argc - a;
- if ((!input_syntax && !input) || !(input_syntax = serd_guess_syntax(input))) {
- input_syntax = SERD_TRIG;
+ bool input_has_graphs = serd_syntax_has_graphs(input_syntax);
+ for (int i = a; i < argc; ++i) {
+ if (serd_syntax_has_graphs(serd_guess_syntax(argv[i]))) {
+ input_has_graphs = true;
+ break;
+ }
}
- const bool input_has_graphs = serd_syntax_has_graphs(input_syntax);
if (!output_syntax && !osyntax_set) {
output_syntax = input_has_graphs ? SERD_NQUADS : SERD_NTRIPLES;
}
- if (!base && input) { // Use input file URI
- base = serd_new_file_uri(SERD_STRING(input), SERD_EMPTY_STRING());
+ if (!base && n_inputs == 1 &&
+ (output_syntax == SERD_NQUADS || output_syntax == SERD_NTRIPLES)) {
+ // Choose base URI from the single input path
+ char* const input_path = serd_canonical_path(inputs[0]);
+ if (!input_path || !(base = serd_new_file_uri(SERD_STRING(input_path),
+ SERD_EMPTY_STRING()))) {
+ SERDI_ERRORF("unable to determine base URI from path %s\n", inputs[0]);
+ }
+ serd_free(input_path);
}
SerdWorld* const world = serd_world_new();
@@ -272,9 +317,6 @@ main(int argc, char** argv)
SerdWriter* const writer =
serd_writer_new(world, output_syntax, writer_flags, env, byte_sink);
- SerdReader* const reader = serd_reader_new(
- world, input_syntax, reader_flags, serd_writer_sink(writer), stack_size);
-
if (quiet) {
serd_world_set_error_func(world, quiet_error_func, NULL);
}
@@ -286,31 +328,69 @@ main(int argc, char** argv)
}
serd_writer_chop_blank_prefix(writer, chop_prefix);
- serd_reader_add_blank_prefix(reader, add_prefix);
SerdStatus st = SERD_SUCCESS;
SerdNode* input_name = NULL;
if (input_string) {
- input_name = serd_new_string(SERD_STRING("string"));
- st = serd_reader_start_string(reader, input_string, input_name);
- } else if (from_stdin) {
- input_name = serd_new_string(SERD_STRING("stdin"));
- st = serd_reader_start_stream(reader,
- serd_file_read_byte,
- (SerdStreamErrorFunc)ferror,
- stdin,
- input_name,
- 1);
- } else {
- st = serd_reader_start_file(reader, input, bulk_read);
+ SerdReader* const reader =
+ serd_reader_new(world,
+ input_syntax ? input_syntax : SERD_TRIG,
+ reader_flags,
+ serd_writer_sink(writer),
+ stack_size);
+
+ serd_reader_add_blank_prefix(reader, add_prefix);
+
+ SerdNode* name = serd_new_string(SERD_STRING("string"));
+ if (!(st = serd_reader_start_string(reader, input_string, name))) {
+ st = serd_reader_read_document(reader);
+ }
+
+ serd_node_free(name);
+ serd_reader_free(reader);
}
- if (!st) {
- st = serd_reader_read_document(reader);
+ size_t prefix_len = 0;
+ char* prefix = NULL;
+ if (n_inputs > 1) {
+ prefix_len = 8 + strlen(add_prefix);
+ prefix = (char*)calloc(1, prefix_len);
}
- serd_reader_finish(reader);
- serd_reader_free(reader);
+ for (int i = 0; !st && i < n_inputs; ++i) {
+ if (!base && strcmp(inputs[i], "-")) {
+ char* const input_path = serd_canonical_path(inputs[i]);
+ if (!input_path) {
+ SERDI_ERRORF("failed to resolve path %s\n", inputs[i]);
+ st = SERD_ERR_BAD_ARG;
+ break;
+ }
+
+ SerdNode* const file_uri =
+ serd_new_file_uri(SERD_STRING(input_path), SERD_EMPTY_STRING());
+
+ serd_env_set_base_uri(env, serd_node_string_view(file_uri));
+ serd_node_free(file_uri);
+ serd_free(input_path);
+ }
+
+ if (n_inputs > 1) {
+ snprintf(prefix, prefix_len, "f%d%s", i, add_prefix);
+ }
+
+ if ((st = read_file(world,
+ input_syntax,
+ reader_flags,
+ serd_writer_sink(writer),
+ stack_size,
+ inputs[i],
+ n_inputs > 1 ? prefix : add_prefix,
+ bulk_read))) {
+ break;
+ }
+ }
+ free(prefix);
+
serd_writer_free(writer);
serd_node_free(input_name);
serd_env_free(env);
diff --git a/test/meson.build b/test/meson.build
index 043ce052..b6c2ce2f 100644
--- a/test/meson.build
+++ b/test/meson.build
@@ -119,6 +119,11 @@ if get_option('utils')
env: test_env,
suite: ['serdi', 'input'])
+ test('multiple', files('test_multifile.py'),
+ args: script_args + [meson.current_source_dir() / 'multifile'],
+ env: test_env,
+ suite: ['serdi', 'input'])
+
test('string', serdi,
args: ['-s', '<foo> a <Bar> .'],
env: test_env,
diff --git a/test/multifile/input1.ttl b/test/multifile/input1.ttl
new file mode 100644
index 00000000..88c3f8e9
--- /dev/null
+++ b/test/multifile/input1.ttl
@@ -0,0 +1,2 @@
+[]
+ a <http://example.org/Type> .
diff --git a/test/multifile/input2.trig b/test/multifile/input2.trig
new file mode 100644
index 00000000..260080a8
--- /dev/null
+++ b/test/multifile/input2.trig
@@ -0,0 +1,7 @@
+[]
+ a <http://example.org/Type> .
+
+<http://example.org/graph> {
+ []
+ a <http://example.org/OtherType> .
+}
diff --git a/test/multifile/output.nq b/test/multifile/output.nq
new file mode 100644
index 00000000..dd35dc4d
--- /dev/null
+++ b/test/multifile/output.nq
@@ -0,0 +1,3 @@
+_:f0b1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Type> .
+_:f1b1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Type> .
+_:f1b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/OtherType> <http://example.org/graph> .
diff --git a/test/test_multifile.py b/test/test_multifile.py
new file mode 100755
index 00000000..5fb44bc5
--- /dev/null
+++ b/test/test_multifile.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python3
+
+"""Test reading from several input files."""
+
+import argparse
+import difflib
+import os
+import shlex
+import subprocess
+import sys
+import tempfile
+
+parser = argparse.ArgumentParser(description=__doc__)
+
+parser.add_argument("--serdi", default="./serdi", help="path to serdi")
+parser.add_argument("--wrapper", default="", help="executable wrapper")
+parser.add_argument("testdir", help="multifile test directory")
+
+args = parser.parse_args(sys.argv[1:])
+in1_path = os.path.join(args.testdir, "input1.ttl")
+in2_path = os.path.join(args.testdir, "input2.trig")
+check_path = os.path.join(args.testdir, "output.nq")
+command = shlex.split(args.wrapper) + [args.serdi, in1_path, in2_path]
+
+
+def _show_diff(from_lines, to_lines, from_filename, to_filename):
+ same = True
+ for line in difflib.unified_diff(
+ from_lines,
+ to_lines,
+ fromfile=os.path.abspath(from_filename),
+ tofile=os.path.abspath(to_filename),
+ ):
+ sys.stderr.write(line)
+ same = False
+
+ return same
+
+
+with tempfile.TemporaryFile(mode="w+", encoding="utf-8") as out:
+ proc = subprocess.run(command, check=False, stdout=out)
+
+ assert proc.returncode == 0
+
+ out.seek(0)
+ with open(check_path, "r", encoding="utf-8") as check:
+
+ output_matches = _show_diff(
+ check.readlines(), out.readlines(), check_path, "output"
+ )
+
+ assert output_matches
diff --git a/test/test_stdin.py b/test/test_stdin.py
index 84b6a8b2..461f6d50 100755
--- a/test/test_stdin.py
+++ b/test/test_stdin.py
@@ -14,7 +14,12 @@ parser.add_argument("--serdi", default="./serdi", help="path to serdi")
parser.add_argument("--wrapper", default="", help="executable wrapper")
args = parser.parse_args(sys.argv[1:])
-command = shlex.split(args.wrapper) + [args.serdi, "-"]
+command = shlex.split(args.wrapper) + [
+ args.serdi,
+ "-I",
+ "http://example.org",
+ "-",
+]
DOCUMENT = "<{0}s> <{0}p> <{0}o> .".format("http://example.org/")