aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2019-05-05 16:12:38 +0200
committerDavid Robillard <d@drobilla.net>2021-03-08 23:23:06 -0500
commit4e7e642d0d7b6dfa704f5ae95475854bb8c9b0b2 (patch)
tree6d04338db96fa2546e7e81168eb8ddbafb904b23
parent8b353f1c530ae0f8d112648f34ea612c681ba57c (diff)
downloadserd-4e7e642d0d7b6dfa704f5ae95475854bb8c9b0b2.tar.gz
serd-4e7e642d0d7b6dfa704f5ae95475854bb8c9b0b2.tar.bz2
serd-4e7e642d0d7b6dfa704f5ae95475854bb8c9b0b2.zip
Add support for reading multiple files at once
-rw-r--r--doc/serdi.12
-rw-r--r--src/serdi.c127
-rw-r--r--test/meson.build4
-rw-r--r--test/multifile/input1.ttl2
-rw-r--r--test/multifile/input2.trig7
-rw-r--r--test/multifile/output.nq3
-rwxr-xr-xtest/test_multifile.py52
-rwxr-xr-xtest/test_stdin.py7
8 files changed, 173 insertions, 31 deletions
diff --git a/doc/serdi.1 b/doc/serdi.1
index c23fa174..c834ce42 100644
--- a/doc/serdi.1
+++ b/doc/serdi.1
@@ -16,7 +16,7 @@
.Op Fl r Ar root
.Op Fl s Ar string
.Op Fl w Ar filename
-.Ar input
+.Ar input ...
.Sh DESCRIPTION
.Nm
is a fast command-line utility for streaming and processing RDF data.
diff --git a/src/serdi.c b/src/serdi.c
index 30933552..e8ef9897 100644
--- a/src/serdi.c
+++ b/src/serdi.c
@@ -90,6 +90,42 @@ quiet_error_func(void* const handle, const SerdError* const e)
return SERD_SUCCESS;
}
+static SerdStatus
+read_file(SerdWorld* const world,
+ SerdSyntax syntax,
+ const SerdReaderFlags flags,
+ const SerdSink* const sink,
+ const size_t stack_size,
+ const char* filename,
+ const char* add_prefix,
+ bool bulk_read)
+{
+ syntax = syntax ? syntax : serd_guess_syntax(filename);
+ syntax = syntax ? syntax : SERD_TRIG;
+
+ SerdStatus st = SERD_SUCCESS;
+ SerdReader* reader = serd_reader_new(world, syntax, flags, sink, stack_size);
+
+ serd_reader_add_blank_prefix(reader, add_prefix);
+
+ if (!strcmp(filename, "-")) {
+ SerdNode* name = serd_new_string(SERD_STATIC_STRING("stdin"));
+
+ st = serd_reader_start_stream(
+ reader, serd_file_read_byte, (SerdStreamErrorFunc)ferror, stdin, name, 1);
+
+ serd_node_free(name);
+ } else {
+ st = serd_reader_start_file(reader, filename, bulk_read);
+ }
+
+ st = st ? st : serd_reader_read_document(reader);
+
+ serd_reader_free(reader);
+
+ return st;
+}
+
int
main(int argc, char** argv)
{
@@ -102,21 +138,19 @@ main(int argc, char** argv)
SerdSyntax output_syntax = SERD_SYNTAX_EMPTY;
SerdReaderFlags reader_flags = 0;
SerdWriterFlags writer_flags = 0;
- bool from_stdin = false;
bool bulk_read = true;
bool bulk_write = false;
bool osyntax_set = false;
bool quiet = false;
size_t stack_size = 4194304;
const char* input_string = NULL;
- const char* add_prefix = NULL;
+ const char* add_prefix = "";
const char* chop_prefix = NULL;
const char* root_uri = NULL;
const char* out_filename = NULL;
int a = 1;
for (; a < argc && argv[a][0] == '-'; ++a) {
if (argv[a][1] == '\0') {
- from_stdin = true;
break;
}
@@ -214,19 +248,27 @@ main(int argc, char** argv)
return 1;
}
- const char* input = argv[a++];
+ char* const* const inputs = argv + a;
+ const int n_inputs = argc - a;
- if ((!input_syntax && !input) || !(input_syntax = serd_guess_syntax(input))) {
- input_syntax = SERD_TRIG;
+ bool input_has_graphs = serd_syntax_has_graphs(input_syntax);
+ for (int i = a; i < argc; ++i) {
+ if (serd_syntax_has_graphs(serd_guess_syntax(argv[i]))) {
+ input_has_graphs = true;
+ break;
+ }
}
- const bool input_has_graphs = serd_syntax_has_graphs(input_syntax);
if (!output_syntax && !osyntax_set) {
output_syntax = input_has_graphs ? SERD_NQUADS : SERD_NTRIPLES;
}
- if (!base && input) { // Use input file URI
- base = serd_new_file_uri(SERD_MEASURE_STRING(input), SERD_EMPTY_STRING());
+ if (!base && n_inputs == 1 &&
+ (output_syntax == SERD_NQUADS || output_syntax == SERD_NTRIPLES)) {
+ // Choose base URI from the single input path
+ if (!(base = serd_new_real_file_uri(inputs[0], NULL))) {
+ SERDI_ERRORF("unable to determine base URI from path %s\n", inputs[0]);
+ }
}
SerdWorld* const world = serd_world_new();
@@ -253,9 +295,6 @@ main(int argc, char** argv)
SerdWriter* const writer =
serd_writer_new(world, output_syntax, writer_flags, env, byte_sink);
- SerdReader* const reader = serd_reader_new(
- world, input_syntax, reader_flags, serd_writer_sink(writer), stack_size);
-
if (quiet) {
serd_world_set_error_func(world, quiet_error_func, NULL);
}
@@ -263,32 +302,62 @@ main(int argc, char** argv)
SerdNode* root = serd_new_uri(SERD_MEASURE_STRING(root_uri));
serd_writer_set_root_uri(writer, root);
serd_writer_chop_blank_prefix(writer, chop_prefix);
- serd_reader_add_blank_prefix(reader, add_prefix);
serd_node_free(root);
SerdStatus st = SERD_SUCCESS;
SerdNode* input_name = NULL;
if (input_string) {
- input_name = serd_new_string(SERD_STATIC_STRING("string"));
- st = serd_reader_start_string(reader, input_string, input_name);
- } else if (from_stdin) {
- input_name = serd_new_string(SERD_STATIC_STRING("stdin"));
- st = serd_reader_start_stream(reader,
- serd_file_read_byte,
- (SerdStreamErrorFunc)ferror,
- stdin,
- input_name,
- 1);
- } else {
- st = serd_reader_start_file(reader, input, bulk_read);
+ SerdReader* const reader =
+ serd_reader_new(world,
+ input_syntax ? input_syntax : SERD_TRIG,
+ reader_flags,
+ serd_writer_sink(writer),
+ stack_size);
+
+ serd_reader_add_blank_prefix(reader, add_prefix);
+
+ SerdNode* name = serd_new_string(SERD_STATIC_STRING("string"));
+ if (!(st = serd_reader_start_string(reader, input_string, name))) {
+ st = serd_reader_read_document(reader);
+ }
+
+ serd_node_free(name);
+ serd_reader_free(reader);
}
- if (!st) {
- st = serd_reader_read_document(reader);
+ size_t prefix_len = 0;
+ char* prefix = NULL;
+ if (n_inputs > 1) {
+ prefix_len = 8 + strlen(add_prefix);
+ prefix = (char*)calloc(1, prefix_len);
}
- serd_reader_finish(reader);
- serd_reader_free(reader);
+ for (int i = 0; i < n_inputs; ++i) {
+ if (!base) {
+ SerdNode* file_uri =
+ serd_new_file_uri(SERD_MEASURE_STRING(inputs[i]), SERD_EMPTY_STRING());
+
+ serd_env_set_base_uri(env, serd_node_string_view(file_uri));
+ serd_node_free(file_uri);
+ }
+
+ if (n_inputs > 1) {
+ snprintf(prefix, prefix_len, "f%d%s", i, add_prefix);
+ }
+
+ if ((st = read_file(world,
+ input_syntax,
+ reader_flags,
+ serd_writer_sink(writer),
+ stack_size,
+ inputs[i],
+ n_inputs > 1 ? prefix : add_prefix,
+ bulk_read))) {
+ break;
+ }
+ }
+ free(prefix);
+
serd_writer_free(writer);
serd_node_free(input_name);
serd_env_free(env);
diff --git a/test/meson.build b/test/meson.build
index 6f6d7378..ac1fb205 100644
--- a/test/meson.build
+++ b/test/meson.build
@@ -89,6 +89,10 @@ if get_option('utils')
args: script_args,
suite: ['serdi', 'input'])
+ test('multiple', files('test_multifile.py'),
+ args: script_args + [meson.current_source_dir() / 'multifile'],
+ suite: ['serdi', 'input'])
+
test('string', serdi,
args: ['-s', '<foo> a <Bar> .'],
should_fail: true,
diff --git a/test/multifile/input1.ttl b/test/multifile/input1.ttl
new file mode 100644
index 00000000..88c3f8e9
--- /dev/null
+++ b/test/multifile/input1.ttl
@@ -0,0 +1,2 @@
+[]
+ a <http://example.org/Type> .
diff --git a/test/multifile/input2.trig b/test/multifile/input2.trig
new file mode 100644
index 00000000..260080a8
--- /dev/null
+++ b/test/multifile/input2.trig
@@ -0,0 +1,7 @@
+[]
+ a <http://example.org/Type> .
+
+<http://example.org/graph> {
+ []
+ a <http://example.org/OtherType> .
+}
diff --git a/test/multifile/output.nq b/test/multifile/output.nq
new file mode 100644
index 00000000..dd35dc4d
--- /dev/null
+++ b/test/multifile/output.nq
@@ -0,0 +1,3 @@
+_:f0b1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Type> .
+_:f1b1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Type> .
+_:f1b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/OtherType> <http://example.org/graph> .
diff --git a/test/test_multifile.py b/test/test_multifile.py
new file mode 100755
index 00000000..5fb44bc5
--- /dev/null
+++ b/test/test_multifile.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python3
+
+"""Test reading from several input files."""
+
+import argparse
+import difflib
+import os
+import shlex
+import subprocess
+import sys
+import tempfile
+
+parser = argparse.ArgumentParser(description=__doc__)
+
+parser.add_argument("--serdi", default="./serdi", help="path to serdi")
+parser.add_argument("--wrapper", default="", help="executable wrapper")
+parser.add_argument("testdir", help="multifile test directory")
+
+args = parser.parse_args(sys.argv[1:])
+in1_path = os.path.join(args.testdir, "input1.ttl")
+in2_path = os.path.join(args.testdir, "input2.trig")
+check_path = os.path.join(args.testdir, "output.nq")
+command = shlex.split(args.wrapper) + [args.serdi, in1_path, in2_path]
+
+
+def _show_diff(from_lines, to_lines, from_filename, to_filename):
+ same = True
+ for line in difflib.unified_diff(
+ from_lines,
+ to_lines,
+ fromfile=os.path.abspath(from_filename),
+ tofile=os.path.abspath(to_filename),
+ ):
+ sys.stderr.write(line)
+ same = False
+
+ return same
+
+
+with tempfile.TemporaryFile(mode="w+", encoding="utf-8") as out:
+ proc = subprocess.run(command, check=False, stdout=out)
+
+ assert proc.returncode == 0
+
+ out.seek(0)
+ with open(check_path, "r", encoding="utf-8") as check:
+
+ output_matches = _show_diff(
+ check.readlines(), out.readlines(), check_path, "output"
+ )
+
+ assert output_matches
diff --git a/test/test_stdin.py b/test/test_stdin.py
index 28286e09..9a27fcd9 100755
--- a/test/test_stdin.py
+++ b/test/test_stdin.py
@@ -14,7 +14,12 @@ parser.add_argument("--serdi", default="./serdi", help="path to serdi")
parser.add_argument("--wrapper", default="", help="executable wrapper")
args = parser.parse_args(sys.argv[1:])
-command = shlex.split(args.wrapper) + [args.serdi, "-"]
+command = shlex.split(args.wrapper) + [
+ args.serdi,
+ "-I",
+ "http://example.org",
+ "-",
+]
DOCUMENT = "<{0}s> <{0}p> <{0}o> .".format("http://example.org/")