diff options
-rw-r--r-- | doc/serdi.1 | 4 | ||||
-rw-r--r-- | src/reader.c | 3 | ||||
-rw-r--r-- | src/serdi.c | 138 | ||||
-rw-r--r-- | test/meson.build | 5 | ||||
-rw-r--r-- | test/multifile/input1.ttl | 2 | ||||
-rw-r--r-- | test/multifile/input2.trig | 7 | ||||
-rw-r--r-- | test/multifile/output.nq | 3 | ||||
-rwxr-xr-x | test/test_multifile.py | 52 | ||||
-rwxr-xr-x | test/test_stdin.py | 7 |
9 files changed, 186 insertions, 35 deletions
diff --git a/doc/serdi.1 b/doc/serdi.1 index b2c94d2c..f9c98492 100644 --- a/doc/serdi.1 +++ b/doc/serdi.1 @@ -16,11 +16,11 @@ .Op Fl r Ar root .Op Fl s Ar string .Op Fl w Ar filename -.Ar input +.Ar input ... .Sh DESCRIPTION .Nm is a fast command-line utility for streaming and processing RDF data. -It reads an RDF document and writes the data again, +It reads one or more RDF documents and writes the data again, possibly transformed and/or in a different syntax. By default, the input syntax is guessed from the file extension, diff --git a/src/reader.c b/src/reader.c index 0f720d9b..ed6caafd 100644 --- a/src/reader.c +++ b/src/reader.c @@ -24,7 +24,6 @@ #include "system.h" #include "world.h" -#include <errno.h> #include <stdarg.h> #include <stdio.h> #include <stdlib.h> @@ -311,8 +310,6 @@ serd_reader_prepare(SerdReader* const reader) st = skip_bom(reader); } else if (st == SERD_FAILURE) { reader->source.eof = true; - } else { - r_err(reader, st, "error preparing read: %s\n", strerror(errno)); } return st; } diff --git a/src/serdi.c b/src/serdi.c index f2da0115..2e04ae5a 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -91,6 +91,42 @@ quiet_error_func(void* const handle, const SerdError* const e) return SERD_SUCCESS; } +static SerdStatus +read_file(SerdWorld* const world, + SerdSyntax syntax, + const SerdReaderFlags flags, + const SerdSink* const sink, + const size_t stack_size, + const char* const filename, + const char* const add_prefix, + const bool bulk_read) +{ + syntax = syntax ? syntax : serd_guess_syntax(filename); + syntax = syntax ? syntax : SERD_TRIG; + + SerdStatus st = SERD_SUCCESS; + SerdReader* reader = serd_reader_new(world, syntax, flags, sink, stack_size); + + serd_reader_add_blank_prefix(reader, add_prefix); + + if (!strcmp(filename, "-")) { + SerdNode* name = serd_new_string(SERD_STRING("stdin")); + + st = serd_reader_start_stream( + reader, serd_file_read_byte, (SerdStreamErrorFunc)ferror, stdin, name, 1); + + serd_node_free(name); + } else { + st = serd_reader_start_file(reader, filename, bulk_read); + } + + st = st ? st : serd_reader_read_document(reader); + + serd_reader_free(reader); + + return st; +} + int main(int argc, char** argv) { @@ -104,21 +140,19 @@ main(int argc, char** argv) SerdSyntax output_syntax = SERD_SYNTAX_EMPTY; SerdReaderFlags reader_flags = 0; SerdWriterFlags writer_flags = 0; - bool from_stdin = false; bool bulk_read = true; bool bulk_write = false; bool osyntax_set = false; bool quiet = false; size_t stack_size = 4194304; const char* input_string = NULL; - const char* add_prefix = NULL; + const char* add_prefix = ""; const char* chop_prefix = NULL; const char* root_uri = NULL; const char* out_filename = NULL; int a = 1; for (; a < argc && argv[a][0] == '-'; ++a) { if (argv[a][1] == '\0') { - from_stdin = true; break; } @@ -232,19 +266,30 @@ main(int argc, char** argv) return 1; } - const char* input = argv[a++]; + char* const* const inputs = argv + a; + const int n_inputs = argc - a; - if ((!input_syntax && !input) || !(input_syntax = serd_guess_syntax(input))) { - input_syntax = SERD_TRIG; + bool input_has_graphs = serd_syntax_has_graphs(input_syntax); + for (int i = a; i < argc; ++i) { + if (serd_syntax_has_graphs(serd_guess_syntax(argv[i]))) { + input_has_graphs = true; + break; + } } - const bool input_has_graphs = serd_syntax_has_graphs(input_syntax); if (!output_syntax && !osyntax_set) { output_syntax = input_has_graphs ? SERD_NQUADS : SERD_NTRIPLES; } - if (!base && input) { // Use input file URI - base = serd_new_file_uri(SERD_STRING(input), SERD_EMPTY_STRING()); + if (!base && n_inputs == 1 && + (output_syntax == SERD_NQUADS || output_syntax == SERD_NTRIPLES)) { + // Choose base URI from the single input path + char* const input_path = serd_canonical_path(inputs[0]); + if (!input_path || !(base = serd_new_file_uri(SERD_STRING(input_path), + SERD_EMPTY_STRING()))) { + SERDI_ERRORF("unable to determine base URI from path %s\n", inputs[0]); + } + serd_free(input_path); } SerdWorld* const world = serd_world_new(); @@ -272,9 +317,6 @@ main(int argc, char** argv) SerdWriter* const writer = serd_writer_new(world, output_syntax, writer_flags, env, byte_sink); - SerdReader* const reader = serd_reader_new( - world, input_syntax, reader_flags, serd_writer_sink(writer), stack_size); - if (quiet) { serd_world_set_error_func(world, quiet_error_func, NULL); } @@ -286,31 +328,69 @@ main(int argc, char** argv) } serd_writer_chop_blank_prefix(writer, chop_prefix); - serd_reader_add_blank_prefix(reader, add_prefix); SerdStatus st = SERD_SUCCESS; SerdNode* input_name = NULL; if (input_string) { - input_name = serd_new_string(SERD_STRING("string")); - st = serd_reader_start_string(reader, input_string, input_name); - } else if (from_stdin) { - input_name = serd_new_string(SERD_STRING("stdin")); - st = serd_reader_start_stream(reader, - serd_file_read_byte, - (SerdStreamErrorFunc)ferror, - stdin, - input_name, - 1); - } else { - st = serd_reader_start_file(reader, input, bulk_read); + SerdReader* const reader = + serd_reader_new(world, + input_syntax ? input_syntax : SERD_TRIG, + reader_flags, + serd_writer_sink(writer), + stack_size); + + serd_reader_add_blank_prefix(reader, add_prefix); + + SerdNode* name = serd_new_string(SERD_STRING("string")); + if (!(st = serd_reader_start_string(reader, input_string, name))) { + st = serd_reader_read_document(reader); + } + + serd_node_free(name); + serd_reader_free(reader); } - if (!st) { - st = serd_reader_read_document(reader); + size_t prefix_len = 0; + char* prefix = NULL; + if (n_inputs > 1) { + prefix_len = 8 + strlen(add_prefix); + prefix = (char*)calloc(1, prefix_len); } - serd_reader_finish(reader); - serd_reader_free(reader); + for (int i = 0; !st && i < n_inputs; ++i) { + if (!base && strcmp(inputs[i], "-")) { + char* const input_path = serd_canonical_path(inputs[i]); + if (!input_path) { + SERDI_ERRORF("failed to resolve path %s\n", inputs[i]); + st = SERD_ERR_BAD_ARG; + break; + } + + SerdNode* const file_uri = + serd_new_file_uri(SERD_STRING(input_path), SERD_EMPTY_STRING()); + + serd_env_set_base_uri(env, serd_node_string_view(file_uri)); + serd_node_free(file_uri); + serd_free(input_path); + } + + if (n_inputs > 1) { + snprintf(prefix, prefix_len, "f%d%s", i, add_prefix); + } + + if ((st = read_file(world, + input_syntax, + reader_flags, + serd_writer_sink(writer), + stack_size, + inputs[i], + n_inputs > 1 ? prefix : add_prefix, + bulk_read))) { + break; + } + } + free(prefix); + serd_writer_free(writer); serd_node_free(input_name); serd_env_free(env); diff --git a/test/meson.build b/test/meson.build index 043ce052..b6c2ce2f 100644 --- a/test/meson.build +++ b/test/meson.build @@ -119,6 +119,11 @@ if get_option('utils') env: test_env, suite: ['serdi', 'input']) + test('multiple', files('test_multifile.py'), + args: script_args + [meson.current_source_dir() / 'multifile'], + env: test_env, + suite: ['serdi', 'input']) + test('string', serdi, args: ['-s', '<foo> a <Bar> .'], env: test_env, diff --git a/test/multifile/input1.ttl b/test/multifile/input1.ttl new file mode 100644 index 00000000..88c3f8e9 --- /dev/null +++ b/test/multifile/input1.ttl @@ -0,0 +1,2 @@ +[] + a <http://example.org/Type> . diff --git a/test/multifile/input2.trig b/test/multifile/input2.trig new file mode 100644 index 00000000..260080a8 --- /dev/null +++ b/test/multifile/input2.trig @@ -0,0 +1,7 @@ +[] + a <http://example.org/Type> . + +<http://example.org/graph> { + [] + a <http://example.org/OtherType> . +} diff --git a/test/multifile/output.nq b/test/multifile/output.nq new file mode 100644 index 00000000..dd35dc4d --- /dev/null +++ b/test/multifile/output.nq @@ -0,0 +1,3 @@ +_:f0b1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Type> . +_:f1b1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Type> . +_:f1b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/OtherType> <http://example.org/graph> . diff --git a/test/test_multifile.py b/test/test_multifile.py new file mode 100755 index 00000000..5fb44bc5 --- /dev/null +++ b/test/test_multifile.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 + +"""Test reading from several input files.""" + +import argparse +import difflib +import os +import shlex +import subprocess +import sys +import tempfile + +parser = argparse.ArgumentParser(description=__doc__) + +parser.add_argument("--serdi", default="./serdi", help="path to serdi") +parser.add_argument("--wrapper", default="", help="executable wrapper") +parser.add_argument("testdir", help="multifile test directory") + +args = parser.parse_args(sys.argv[1:]) +in1_path = os.path.join(args.testdir, "input1.ttl") +in2_path = os.path.join(args.testdir, "input2.trig") +check_path = os.path.join(args.testdir, "output.nq") +command = shlex.split(args.wrapper) + [args.serdi, in1_path, in2_path] + + +def _show_diff(from_lines, to_lines, from_filename, to_filename): + same = True + for line in difflib.unified_diff( + from_lines, + to_lines, + fromfile=os.path.abspath(from_filename), + tofile=os.path.abspath(to_filename), + ): + sys.stderr.write(line) + same = False + + return same + + +with tempfile.TemporaryFile(mode="w+", encoding="utf-8") as out: + proc = subprocess.run(command, check=False, stdout=out) + + assert proc.returncode == 0 + + out.seek(0) + with open(check_path, "r", encoding="utf-8") as check: + + output_matches = _show_diff( + check.readlines(), out.readlines(), check_path, "output" + ) + + assert output_matches diff --git a/test/test_stdin.py b/test/test_stdin.py index 84b6a8b2..461f6d50 100755 --- a/test/test_stdin.py +++ b/test/test_stdin.py @@ -14,7 +14,12 @@ parser.add_argument("--serdi", default="./serdi", help="path to serdi") parser.add_argument("--wrapper", default="", help="executable wrapper") args = parser.parse_args(sys.argv[1:]) -command = shlex.split(args.wrapper) + [args.serdi, "-"] +command = shlex.split(args.wrapper) + [ + args.serdi, + "-I", + "http://example.org", + "-", +] DOCUMENT = "<{0}s> <{0}p> <{0}o> .".format("http://example.org/") |