From 62a515492994a0320b1c45e87b4360adbf1ae9ba Mon Sep 17 00:00:00 2001 From: David Robillard Date: Sun, 8 Aug 2021 14:30:42 -0400 Subject: Move serdi to tools subdirectory This separates the command-line tool code from the library implementation. --- meson.build | 19 +- meson_options.txt | 4 +- src/console.c | 122 ------------ src/console.h | 34 ---- src/serdi.c | 546 ------------------------------------------------------ test/meson.build | 2 +- tools/console.c | 122 ++++++++++++ tools/console.h | 34 ++++ tools/meson.build | 13 ++ tools/serdi.c | 546 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 files changed, 722 insertions(+), 720 deletions(-) delete mode 100644 src/console.c delete mode 100644 src/console.h delete mode 100644 src/serdi.c create mode 100644 tools/console.c create mode 100644 tools/console.h create mode 100644 tools/meson.build create mode 100644 tools/serdi.c diff --git a/meson.build b/meson.build index 8248f576..bddb7392 100644 --- a/meson.build +++ b/meson.build @@ -196,19 +196,8 @@ pkg.generate( description: 'A lightweight library for working with RDF') # Build serdi command line utility -if get_option('utils') - - tool_link_args = [] - if get_option('static') - tool_link_args += ['-static'] - endif - - serdi = executable('serdi', - ['src/serdi.c', 'src/console.c'], - c_args: c_warnings + platform_args + prog_args, - link_args: tool_link_args, - install: true, - dependencies: serd_dep) +if get_option('tools') + subdir('tools') if not get_option('docs').disabled() install_man('doc/serdi.1') @@ -228,14 +217,14 @@ endif if not meson.is_subproject() and meson.version().version_compare('>=0.53.0') summary('Tests', get_option('tests'), bool_yn: true) - summary('Utilities', get_option('utils'), bool_yn: true) + summary('Tools', get_option('tools'), bool_yn: true) summary('Install prefix', get_option('prefix')) summary('Headers', get_option('prefix') / get_option('includedir')) summary('Libraries', get_option('prefix') / get_option('libdir')) - if get_option('utils') + if get_option('tools') summary('Executables', get_option('prefix') / get_option('bindir')) summary('Man pages', get_option('prefix') / get_option('mandir')) endif diff --git a/meson_options.txt b/meson_options.txt index 5d55e09c..4562d9de 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -7,8 +7,8 @@ option('strict', type: 'boolean', value: false, yield: true, option('tests', type: 'boolean', value: true, yield: true, description: 'Build tests') -option('utils', type: 'boolean', value: true, yield: true, - description: 'Build command line utilities') +option('tools', type: 'boolean', value: true, yield: true, + description: 'Build command line tools') option('static', type: 'boolean', value: false, yield: true, description: 'Statically link executables') diff --git a/src/console.c b/src/console.c deleted file mode 100644 index df1bc2ff..00000000 --- a/src/console.c +++ /dev/null @@ -1,122 +0,0 @@ -/* - Copyright 2011-2021 David Robillard - - Permission to use, copy, modify, and/or distribute this software for any - purpose with or without fee is hereby granted, provided that the above - copyright notice and this permission notice appear in all copies. - - THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -*/ - -#include "console.h" - -#include "serd/serd.h" - -#ifdef _WIN32 -# ifdef _MSC_VER -# define WIN32_LEAN_AND_MEAN 1 -# endif -# include -# include -#endif - -#include -#include - -void -serd_set_stream_utf8_mode(FILE* const stream) -{ -#ifdef _WIN32 - _setmode(_fileno(stream), _O_BINARY); -#else - (void)stream; -#endif -} - -int -serd_print_version(const char* const program) -{ - printf("%s %d.%d.%d \n", - program, - SERD_MAJOR_VERSION, - SERD_MINOR_VERSION, - SERD_MICRO_VERSION); - - printf("Copyright 2011-2022 David Robillard .\n" - "License: \n" - "This is free software; you are free to change and redistribute it.\n" - "There is NO WARRANTY, to the extent permitted by law.\n"); - - return 0; -} - -/// Wrapper for getc that is compatible with SerdReadFunc but faster than fread -static size_t -serd_file_read_byte(void* buf, size_t size, size_t nmemb, void* stream) -{ - (void)size; - (void)nmemb; - - const int c = getc((FILE*)stream); - if (c == EOF) { - *((uint8_t*)buf) = 0; - return 0; - } - *((uint8_t*)buf) = (uint8_t)c; - return 1; -} - -SerdByteSource* -serd_open_input(const char* const filename, const size_t block_size) -{ - SerdByteSource* byte_source = NULL; - if (!strcmp(filename, "-")) { - serd_set_stream_utf8_mode(stdin); - - SerdNode* name = serd_new_string(SERD_STRING("stdin")); - - byte_source = serd_byte_source_new_function( - serd_file_read_byte, (SerdStreamErrorFunc)ferror, NULL, stdin, name, 1); - - serd_node_free(name); - } else { - byte_source = serd_byte_source_new_filename(filename, block_size); - } - - return byte_source; -} - -SerdByteSink* -serd_open_output(const char* const filename, const size_t block_size) -{ - if (!filename || !strcmp(filename, "-")) { - serd_set_stream_utf8_mode(stdout); - return serd_byte_sink_new_function((SerdWriteFunc)fwrite, stdout, 1); - } - - return serd_byte_sink_new_filename(filename, block_size); -} - -SerdStatus -serd_set_base_uri_from_path(SerdEnv* const env, const char* const path) -{ - char* const input_path = serd_canonical_path(path); - if (!input_path) { - return SERD_ERR_BAD_ARG; - } - - SerdNode* const file_uri = - serd_new_file_uri(SERD_STRING(input_path), SERD_EMPTY_STRING()); - - serd_env_set_base_uri(env, serd_node_string_view(file_uri)); - serd_node_free(file_uri); - serd_free(input_path); - - return SERD_SUCCESS; -} diff --git a/src/console.h b/src/console.h deleted file mode 100644 index 31076b24..00000000 --- a/src/console.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - Copyright 2021 David Robillard - - Permission to use, copy, modify, and/or distribute this software for any - purpose with or without fee is hereby granted, provided that the above - copyright notice and this permission notice appear in all copies. - - THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -*/ - -#include "serd/serd.h" - -#include - -void -serd_set_stream_utf8_mode(FILE* stream); - -int -serd_print_version(const char* program); - -SerdByteSource* -serd_open_input(const char* filename, size_t block_size); - -SerdByteSink* -serd_open_output(const char* filename, size_t block_size); - -SerdStatus -serd_set_base_uri_from_path(SerdEnv* env, const char* path); diff --git a/src/serdi.c b/src/serdi.c deleted file mode 100644 index 73a3f05c..00000000 --- a/src/serdi.c +++ /dev/null @@ -1,546 +0,0 @@ -/* - Copyright 2011-2021 David Robillard - - Permission to use, copy, modify, and/or distribute this software for any - purpose with or without fee is hereby granted, provided that the above - copyright notice and this permission notice appear in all copies. - - THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -*/ - -#include "console.h" - -#include "serd/serd.h" - -#include -#include -#include -#include -#include -#include - -#define SERDI_ERROR(msg) fprintf(stderr, "serdi: " msg) -#define SERDI_ERRORF(fmt, ...) fprintf(stderr, "serdi: " fmt, __VA_ARGS__) - -typedef struct { - SerdNode* s; - SerdNode* p; - SerdNode* o; - SerdNode* g; -} FilterPattern; - -static int -print_usage(const char* const name, const bool error) -{ - FILE* const os = error ? stderr : stdout; - fprintf(os, "%s", error ? "\n" : ""); - fprintf(os, "Usage: %s [OPTION]... INPUT...\n", name); - fprintf(os, "Read and write RDF syntax.\n"); - fprintf(os, "Use - for INPUT to read from standard input.\n\n"); - fprintf(os, " -C Convert literals to canonical form.\n"); - fprintf(os, " -F PATTERN Filter out statements that match PATTERN.\n"); - fprintf(os, " -G PATTERN Only include statements matching PATTERN.\n"); - fprintf(os, " -I BASE_URI Input base URI.\n"); - fprintf(os, " -a Write ASCII output if possible.\n"); - fprintf(os, " -b BYTES I/O block size.\n"); - fprintf(os, " -c PREFIX Chop PREFIX from matching blank node IDs.\n"); - fprintf(os, " -f Fast and loose mode (possibly ugly output).\n"); - fprintf(os, " -h Display this help and exit.\n"); - fprintf(os, " -i SYNTAX Input syntax: turtle/ntriples/trig/nquads.\n"); - fprintf(os, " -k BYTES Parser stack size.\n"); - fprintf(os, " -l Lax (non-strict) parsing.\n"); - fprintf(os, " -m Build a model in memory before writing.\n"); - fprintf(os, " -o SYNTAX Output syntax: empty/turtle/ntriples/nquads.\n"); - fprintf(os, " -p PREFIX Add PREFIX to blank node IDs.\n"); - fprintf(os, " -q Suppress all output except data.\n"); - fprintf(os, " -r ROOT_URI Keep relative URIs within ROOT_URI.\n"); - fprintf(os, " -s STRING Parse STRING as input.\n"); - fprintf(os, " -t Write terser output without newlines.\n"); - fprintf(os, " -v Display version information and exit.\n"); - fprintf(os, " -w FILENAME Write output to FILENAME instead of stdout.\n"); - fprintf(os, " -x Support parsing variable nodes like `?x'.\n"); - return error ? 1 : 0; -} - -static int -missing_arg(const char* const name, const char opt) -{ - SERDI_ERRORF("option requires an argument -- '%c'\n", opt); - return print_usage(name, true); -} - -static SerdStatus -on_filter_event(void* const handle, const SerdEvent* const event) -{ - if (event->type == SERD_STATEMENT) { - FilterPattern* const pat = (FilterPattern*)handle; - if (pat->s) { - return SERD_ERR_INVALID; - } - - const SerdStatement* const statement = event->statement.statement; - pat->s = serd_node_copy(serd_statement_subject(statement)); - pat->p = serd_node_copy(serd_statement_predicate(statement)); - pat->o = serd_node_copy(serd_statement_object(statement)); - pat->g = serd_node_copy(serd_statement_graph(statement)); - } - - return SERD_SUCCESS; -} - -static SerdSink* -parse_filter(SerdWorld* const world, - const SerdSink* const sink, - const char* const str, - const bool inclusive) -{ - SerdEnv* const env = serd_env_new(SERD_EMPTY_STRING()); - FilterPattern pat = {NULL, NULL, NULL, NULL}; - SerdSink* in_sink = serd_sink_new(&pat, on_filter_event, NULL); - SerdByteSource* byte_source = serd_byte_source_new_string(str, NULL); - SerdReader* reader = serd_reader_new( - world, SERD_NQUADS, SERD_READ_VARIABLES, env, in_sink, 4096); - - SerdStatus st = serd_reader_start(reader, byte_source); - if (!st) { - st = serd_reader_read_document(reader); - } - - serd_reader_free(reader); - serd_env_free(env); - serd_byte_source_free(byte_source); - serd_sink_free(in_sink); - - if (st) { - return NULL; - } - - SerdSink* filter = - serd_filter_new(sink, pat.s, pat.p, pat.o, pat.g, inclusive); - - serd_node_free(pat.s); - serd_node_free(pat.p); - serd_node_free(pat.o); - serd_node_free(pat.g); - return filter; -} - -static SerdStatus -read_file(SerdWorld* const world, - SerdSyntax syntax, - const SerdReaderFlags flags, - SerdEnv* const env, - const SerdSink* const sink, - const size_t stack_size, - const char* const filename, - const char* const add_prefix, - const size_t block_size) -{ - syntax = syntax ? syntax : serd_guess_syntax(filename); - syntax = syntax ? syntax : SERD_TRIG; - - SerdByteSource* byte_source = serd_open_input(filename, block_size); - - if (!byte_source) { - SERDI_ERRORF( - "failed to open input file `%s' (%s)\n", filename, strerror(errno)); - - return SERD_ERR_UNKNOWN; - } - - SerdReader* reader = - serd_reader_new(world, syntax, flags, env, sink, stack_size); - - serd_reader_add_blank_prefix(reader, add_prefix); - - SerdStatus st = serd_reader_start(reader, byte_source); - - st = st ? st : serd_reader_read_document(reader); - - serd_reader_free(reader); - serd_byte_source_free(byte_source); - - return st; -} - -int -main(int argc, char** argv) -{ - const char* const prog = argv[0]; - if (argc < 2) { - return print_usage(prog, true); - } - - SerdNode* base = NULL; - SerdSyntax input_syntax = SERD_SYNTAX_EMPTY; - SerdSyntax output_syntax = SERD_SYNTAX_EMPTY; - SerdReaderFlags reader_flags = 0; - SerdWriterFlags writer_flags = 0; - bool no_inline = false; - bool osyntax_set = false; - bool use_model = false; - bool canonical = false; - bool quiet = false; - size_t block_size = 4096u; - size_t stack_size = 4194304; - const char* input_string = NULL; - const char* in_pattern = NULL; - const char* out_pattern = NULL; - const char* add_prefix = ""; - const char* chop_prefix = NULL; - const char* root_uri = NULL; - const char* out_filename = NULL; - int a = 1; - for (; a < argc && argv[a][0] == '-'; ++a) { - if (argv[a][1] == '\0') { - break; - } - - for (int o = 1; argv[a][o]; ++o) { - const char opt = argv[a][o]; - - if (opt == 'C') { - canonical = true; - } else if (opt == 'a') { - writer_flags |= SERD_WRITE_ASCII; - } else if (opt == 'f') { - no_inline = true; - writer_flags |= (SERD_WRITE_EXPANDED | SERD_WRITE_VERBATIM); - } else if (opt == 'h') { - return print_usage(prog, false); - } else if (opt == 'l') { - reader_flags |= SERD_READ_LAX; - writer_flags |= SERD_WRITE_LAX; - } else if (argv[a][1] == 'm') { - use_model = true; - } else if (opt == 'q') { - quiet = true; - } else if (opt == 't') { - writer_flags |= SERD_WRITE_TERSE; - } else if (opt == 'v') { - return serd_print_version(argv[0]); - } else if (opt == 'x') { - reader_flags |= SERD_READ_VARIABLES; - } else if (argv[a][1] == 'F') { - if (++a == argc) { - return missing_arg(argv[0], 'F'); - } - - out_pattern = argv[a]; - break; - } else if (argv[a][1] == 'G') { - if (++a == argc) { - return missing_arg(argv[0], 'g'); - } - - in_pattern = argv[a]; - break; - } else if (argv[a][1] == 'I') { - if (++a == argc) { - return missing_arg(prog, 'I'); - } - - base = serd_new_uri(SERD_STRING(argv[a])); - break; - } else if (opt == 'b') { - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(prog, 'b'); - } - - char* endptr = NULL; - const long size = strtol(argv[a], &endptr, 10); - if (size < 1 || size == LONG_MAX || *endptr != '\0') { - SERDI_ERRORF("invalid block size `%s'\n", argv[a]); - return 1; - } - block_size = (size_t)size; - break; - } else if (opt == 'c') { - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(prog, 'c'); - } - - chop_prefix = argv[a]; - break; - } else if (opt == 'i') { - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(prog, 'i'); - } - - if (!(input_syntax = serd_syntax_by_name(argv[a]))) { - return print_usage(prog, true); - } - break; - } else if (opt == 'k') { - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(prog, 'k'); - } - - char* endptr = NULL; - const long size = strtol(argv[a], &endptr, 10); - if (size <= 0 || size == LONG_MAX || *endptr != '\0') { - SERDI_ERRORF("invalid stack size `%s'\n", argv[a]); - return 1; - } - stack_size = (size_t)size; - break; - } else if (opt == 'o') { - osyntax_set = true; - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(prog, 'o'); - } - - if (!strcmp(argv[a], "empty")) { - output_syntax = SERD_SYNTAX_EMPTY; - } else if (!(output_syntax = serd_syntax_by_name(argv[a]))) { - return print_usage(argv[0], true); - } - break; - } else if (opt == 'p') { - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(prog, 'p'); - } - - add_prefix = argv[a]; - break; - } else if (opt == 'r') { - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(prog, 'r'); - } - - root_uri = argv[a]; - break; - } else if (opt == 's') { - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(prog, 's'); - } - - input_string = argv[a]; - break; - } else if (opt == 'w') { - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(argv[0], 'w'); - } - - out_filename = argv[a]; - break; - } else { - SERDI_ERRORF("invalid option -- '%s'\n", argv[a] + 1); - return print_usage(prog, true); - } - } - } - - if (in_pattern && out_pattern) { - SERDI_ERROR("only one of -F and -G can be given at once\n"); - return 1; - } - - if (a == argc && !input_string) { - SERDI_ERROR("missing input\n"); - return 1; - } - - char* const* const inputs = argv + a; - const int n_inputs = argc - a; - - bool input_has_graphs = serd_syntax_has_graphs(input_syntax); - for (int i = a; i < argc; ++i) { - if (serd_syntax_has_graphs(serd_guess_syntax(argv[i]))) { - input_has_graphs = true; - break; - } - } - - if (!output_syntax && !osyntax_set) { - output_syntax = input_has_graphs ? SERD_NQUADS : SERD_NTRIPLES; - } - - if (!base && n_inputs == 1 && - (output_syntax == SERD_NQUADS || output_syntax == SERD_NTRIPLES)) { - // Choose base URI from the single input path - char* const input_path = serd_canonical_path(inputs[0]); - if (!input_path || !(base = serd_new_file_uri(SERD_STRING(input_path), - SERD_EMPTY_STRING()))) { - SERDI_ERRORF("unable to determine base URI from path %s\n", inputs[0]); - } - serd_free(input_path); - } - - SerdWorld* const world = serd_world_new(); - SerdEnv* const env = - serd_env_new(base ? serd_node_string_view(base) : SERD_EMPTY_STRING()); - - serd_set_stream_utf8_mode(stdin); - if (!out_filename) { - serd_set_stream_utf8_mode(stdout); - } - - const SerdDescribeFlags describe_flags = - no_inline ? SERD_NO_INLINE_OBJECTS : 0u; - - SerdByteSink* const byte_sink = serd_open_output(out_filename, block_size); - if (!byte_sink) { - perror("serdi: error opening output file"); - return 1; - } - - SerdWriter* const writer = - serd_writer_new(world, output_syntax, writer_flags, env, byte_sink); - - SerdModel* model = NULL; - SerdSink* inserter = NULL; - const SerdSink* out_sink = NULL; - if (use_model) { - const SerdModelFlags flags = (input_has_graphs ? SERD_STORE_GRAPHS : 0u); - - model = serd_model_new(world, SERD_ORDER_SPO, flags); - if (input_has_graphs) { - serd_model_add_index(model, SERD_ORDER_GSPO); - } - - if (!no_inline) { - serd_model_add_index(model, SERD_ORDER_OPS); - if (input_has_graphs) { - serd_model_add_index(model, SERD_ORDER_GOPS); - } - } - - inserter = serd_inserter_new(model, NULL); - out_sink = inserter; - } else { - out_sink = serd_writer_sink(writer); - } - - const SerdSink* sink = out_sink; - - SerdSink* canon = NULL; - if (canonical) { - sink = canon = serd_canon_new(world, out_sink, reader_flags); - } - - SerdSink* filter = NULL; - if (in_pattern) { - if (!(filter = parse_filter(world, sink, in_pattern, true))) { - SERDI_ERROR("error parsing inclusive filter pattern\n"); - return EXIT_FAILURE; - } - - sink = filter; - } else if (out_pattern) { - if (!(filter = parse_filter(world, sink, out_pattern, false))) { - SERDI_ERROR("error parsing exclusive filter pattern\n"); - return EXIT_FAILURE; - } - - sink = filter; - } - - if (quiet) { - serd_set_log_func(world, serd_quiet_log_func, NULL); - } - - if (root_uri) { - serd_writer_set_root_uri(writer, SERD_STRING(root_uri)); - } - - serd_writer_chop_blank_prefix(writer, chop_prefix); - - SerdStatus st = SERD_SUCCESS; - if (input_string) { - SerdByteSource* const byte_source = - serd_byte_source_new_string(input_string, NULL); - - SerdReader* const reader = - serd_reader_new(world, - input_syntax ? input_syntax : SERD_TRIG, - reader_flags, - env, - sink, - stack_size); - - serd_reader_add_blank_prefix(reader, add_prefix); - - if (!(st = serd_reader_start(reader, byte_source))) { - st = serd_reader_read_document(reader); - } - - serd_reader_free(reader); - serd_byte_source_free(byte_source); - } - - size_t prefix_len = 0; - char* prefix = NULL; - if (n_inputs > 1) { - prefix_len = 8 + strlen(add_prefix); - prefix = (char*)calloc(1, prefix_len); - } - - for (int i = 0; !st && i < n_inputs; ++i) { - if (!base && strcmp(inputs[i], "-")) { - if ((st = serd_set_base_uri_from_path(env, inputs[i]))) { - SERDI_ERRORF("failed to set base URI from path %s\n", inputs[i]); - break; - } - } - - if (n_inputs > 1) { - snprintf(prefix, prefix_len, "f%d%s", i, add_prefix); - } - - if ((st = read_file(world, - input_syntax, - reader_flags, - env, - sink, - stack_size, - inputs[i], - n_inputs > 1 ? prefix : add_prefix, - block_size))) { - break; - } - } - free(prefix); - - if (st <= SERD_FAILURE && use_model) { - const SerdSink* writer_sink = serd_writer_sink(writer); - SerdCursor* everything = serd_model_begin_ordered( - model, input_has_graphs ? SERD_ORDER_GSPO : SERD_ORDER_SPO); - - serd_env_write_prefixes(env, writer_sink); - - st = serd_describe_range( - everything, - writer_sink, - describe_flags | - ((output_syntax == SERD_NTRIPLES || output_syntax == SERD_NQUADS) - ? SERD_NO_INLINE_OBJECTS - : 0u)); - - serd_cursor_free(everything); - } - - serd_sink_free(canon); - serd_sink_free(filter); - serd_sink_free(inserter); - serd_model_free(model); - serd_writer_free(writer); - serd_env_free(env); - serd_node_free(base); - serd_world_free(world); - - if (serd_byte_sink_close(byte_sink) || (!out_filename && fclose(stdout))) { - perror("serdi: write error"); - st = SERD_ERR_UNKNOWN; - } - - serd_byte_sink_free(byte_sink); - - return (st > SERD_FAILURE) ? 1 : 0; -} diff --git a/test/meson.build b/test/meson.build index 898062f0..2cf3f32c 100644 --- a/test/meson.build +++ b/test/meson.build @@ -50,7 +50,7 @@ if autoship.found() test('autoship', autoship, args: ['test', serd_src_root], suite: 'data') endif -if get_option('utils') +if is_variable('serdi') if wrapper != '' script_args = ['--wrapper', wrapper, '--serdi', serdi.full_path()] diff --git a/tools/console.c b/tools/console.c new file mode 100644 index 00000000..df1bc2ff --- /dev/null +++ b/tools/console.c @@ -0,0 +1,122 @@ +/* + Copyright 2011-2021 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "console.h" + +#include "serd/serd.h" + +#ifdef _WIN32 +# ifdef _MSC_VER +# define WIN32_LEAN_AND_MEAN 1 +# endif +# include +# include +#endif + +#include +#include + +void +serd_set_stream_utf8_mode(FILE* const stream) +{ +#ifdef _WIN32 + _setmode(_fileno(stream), _O_BINARY); +#else + (void)stream; +#endif +} + +int +serd_print_version(const char* const program) +{ + printf("%s %d.%d.%d \n", + program, + SERD_MAJOR_VERSION, + SERD_MINOR_VERSION, + SERD_MICRO_VERSION); + + printf("Copyright 2011-2022 David Robillard .\n" + "License: \n" + "This is free software; you are free to change and redistribute it.\n" + "There is NO WARRANTY, to the extent permitted by law.\n"); + + return 0; +} + +/// Wrapper for getc that is compatible with SerdReadFunc but faster than fread +static size_t +serd_file_read_byte(void* buf, size_t size, size_t nmemb, void* stream) +{ + (void)size; + (void)nmemb; + + const int c = getc((FILE*)stream); + if (c == EOF) { + *((uint8_t*)buf) = 0; + return 0; + } + *((uint8_t*)buf) = (uint8_t)c; + return 1; +} + +SerdByteSource* +serd_open_input(const char* const filename, const size_t block_size) +{ + SerdByteSource* byte_source = NULL; + if (!strcmp(filename, "-")) { + serd_set_stream_utf8_mode(stdin); + + SerdNode* name = serd_new_string(SERD_STRING("stdin")); + + byte_source = serd_byte_source_new_function( + serd_file_read_byte, (SerdStreamErrorFunc)ferror, NULL, stdin, name, 1); + + serd_node_free(name); + } else { + byte_source = serd_byte_source_new_filename(filename, block_size); + } + + return byte_source; +} + +SerdByteSink* +serd_open_output(const char* const filename, const size_t block_size) +{ + if (!filename || !strcmp(filename, "-")) { + serd_set_stream_utf8_mode(stdout); + return serd_byte_sink_new_function((SerdWriteFunc)fwrite, stdout, 1); + } + + return serd_byte_sink_new_filename(filename, block_size); +} + +SerdStatus +serd_set_base_uri_from_path(SerdEnv* const env, const char* const path) +{ + char* const input_path = serd_canonical_path(path); + if (!input_path) { + return SERD_ERR_BAD_ARG; + } + + SerdNode* const file_uri = + serd_new_file_uri(SERD_STRING(input_path), SERD_EMPTY_STRING()); + + serd_env_set_base_uri(env, serd_node_string_view(file_uri)); + serd_node_free(file_uri); + serd_free(input_path); + + return SERD_SUCCESS; +} diff --git a/tools/console.h b/tools/console.h new file mode 100644 index 00000000..31076b24 --- /dev/null +++ b/tools/console.h @@ -0,0 +1,34 @@ +/* + Copyright 2021 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "serd/serd.h" + +#include + +void +serd_set_stream_utf8_mode(FILE* stream); + +int +serd_print_version(const char* program); + +SerdByteSource* +serd_open_input(const char* filename, size_t block_size); + +SerdByteSink* +serd_open_output(const char* filename, size_t block_size); + +SerdStatus +serd_set_base_uri_from_path(SerdEnv* env, const char* path); diff --git a/tools/meson.build b/tools/meson.build new file mode 100644 index 00000000..3054364a --- /dev/null +++ b/tools/meson.build @@ -0,0 +1,13 @@ +tool_c_args = c_warnings + platform_args + prog_args +tool_link_args = [] + +if get_option('static') + tool_link_args += ['-static'] +endif + +serdi = executable('serdi', + ['serdi.c', 'console.c'], + c_args: tool_c_args, + link_args: tool_link_args, + install: true, + dependencies: serd_dep) diff --git a/tools/serdi.c b/tools/serdi.c new file mode 100644 index 00000000..73a3f05c --- /dev/null +++ b/tools/serdi.c @@ -0,0 +1,546 @@ +/* + Copyright 2011-2021 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "console.h" + +#include "serd/serd.h" + +#include +#include +#include +#include +#include +#include + +#define SERDI_ERROR(msg) fprintf(stderr, "serdi: " msg) +#define SERDI_ERRORF(fmt, ...) fprintf(stderr, "serdi: " fmt, __VA_ARGS__) + +typedef struct { + SerdNode* s; + SerdNode* p; + SerdNode* o; + SerdNode* g; +} FilterPattern; + +static int +print_usage(const char* const name, const bool error) +{ + FILE* const os = error ? stderr : stdout; + fprintf(os, "%s", error ? "\n" : ""); + fprintf(os, "Usage: %s [OPTION]... INPUT...\n", name); + fprintf(os, "Read and write RDF syntax.\n"); + fprintf(os, "Use - for INPUT to read from standard input.\n\n"); + fprintf(os, " -C Convert literals to canonical form.\n"); + fprintf(os, " -F PATTERN Filter out statements that match PATTERN.\n"); + fprintf(os, " -G PATTERN Only include statements matching PATTERN.\n"); + fprintf(os, " -I BASE_URI Input base URI.\n"); + fprintf(os, " -a Write ASCII output if possible.\n"); + fprintf(os, " -b BYTES I/O block size.\n"); + fprintf(os, " -c PREFIX Chop PREFIX from matching blank node IDs.\n"); + fprintf(os, " -f Fast and loose mode (possibly ugly output).\n"); + fprintf(os, " -h Display this help and exit.\n"); + fprintf(os, " -i SYNTAX Input syntax: turtle/ntriples/trig/nquads.\n"); + fprintf(os, " -k BYTES Parser stack size.\n"); + fprintf(os, " -l Lax (non-strict) parsing.\n"); + fprintf(os, " -m Build a model in memory before writing.\n"); + fprintf(os, " -o SYNTAX Output syntax: empty/turtle/ntriples/nquads.\n"); + fprintf(os, " -p PREFIX Add PREFIX to blank node IDs.\n"); + fprintf(os, " -q Suppress all output except data.\n"); + fprintf(os, " -r ROOT_URI Keep relative URIs within ROOT_URI.\n"); + fprintf(os, " -s STRING Parse STRING as input.\n"); + fprintf(os, " -t Write terser output without newlines.\n"); + fprintf(os, " -v Display version information and exit.\n"); + fprintf(os, " -w FILENAME Write output to FILENAME instead of stdout.\n"); + fprintf(os, " -x Support parsing variable nodes like `?x'.\n"); + return error ? 1 : 0; +} + +static int +missing_arg(const char* const name, const char opt) +{ + SERDI_ERRORF("option requires an argument -- '%c'\n", opt); + return print_usage(name, true); +} + +static SerdStatus +on_filter_event(void* const handle, const SerdEvent* const event) +{ + if (event->type == SERD_STATEMENT) { + FilterPattern* const pat = (FilterPattern*)handle; + if (pat->s) { + return SERD_ERR_INVALID; + } + + const SerdStatement* const statement = event->statement.statement; + pat->s = serd_node_copy(serd_statement_subject(statement)); + pat->p = serd_node_copy(serd_statement_predicate(statement)); + pat->o = serd_node_copy(serd_statement_object(statement)); + pat->g = serd_node_copy(serd_statement_graph(statement)); + } + + return SERD_SUCCESS; +} + +static SerdSink* +parse_filter(SerdWorld* const world, + const SerdSink* const sink, + const char* const str, + const bool inclusive) +{ + SerdEnv* const env = serd_env_new(SERD_EMPTY_STRING()); + FilterPattern pat = {NULL, NULL, NULL, NULL}; + SerdSink* in_sink = serd_sink_new(&pat, on_filter_event, NULL); + SerdByteSource* byte_source = serd_byte_source_new_string(str, NULL); + SerdReader* reader = serd_reader_new( + world, SERD_NQUADS, SERD_READ_VARIABLES, env, in_sink, 4096); + + SerdStatus st = serd_reader_start(reader, byte_source); + if (!st) { + st = serd_reader_read_document(reader); + } + + serd_reader_free(reader); + serd_env_free(env); + serd_byte_source_free(byte_source); + serd_sink_free(in_sink); + + if (st) { + return NULL; + } + + SerdSink* filter = + serd_filter_new(sink, pat.s, pat.p, pat.o, pat.g, inclusive); + + serd_node_free(pat.s); + serd_node_free(pat.p); + serd_node_free(pat.o); + serd_node_free(pat.g); + return filter; +} + +static SerdStatus +read_file(SerdWorld* const world, + SerdSyntax syntax, + const SerdReaderFlags flags, + SerdEnv* const env, + const SerdSink* const sink, + const size_t stack_size, + const char* const filename, + const char* const add_prefix, + const size_t block_size) +{ + syntax = syntax ? syntax : serd_guess_syntax(filename); + syntax = syntax ? syntax : SERD_TRIG; + + SerdByteSource* byte_source = serd_open_input(filename, block_size); + + if (!byte_source) { + SERDI_ERRORF( + "failed to open input file `%s' (%s)\n", filename, strerror(errno)); + + return SERD_ERR_UNKNOWN; + } + + SerdReader* reader = + serd_reader_new(world, syntax, flags, env, sink, stack_size); + + serd_reader_add_blank_prefix(reader, add_prefix); + + SerdStatus st = serd_reader_start(reader, byte_source); + + st = st ? st : serd_reader_read_document(reader); + + serd_reader_free(reader); + serd_byte_source_free(byte_source); + + return st; +} + +int +main(int argc, char** argv) +{ + const char* const prog = argv[0]; + if (argc < 2) { + return print_usage(prog, true); + } + + SerdNode* base = NULL; + SerdSyntax input_syntax = SERD_SYNTAX_EMPTY; + SerdSyntax output_syntax = SERD_SYNTAX_EMPTY; + SerdReaderFlags reader_flags = 0; + SerdWriterFlags writer_flags = 0; + bool no_inline = false; + bool osyntax_set = false; + bool use_model = false; + bool canonical = false; + bool quiet = false; + size_t block_size = 4096u; + size_t stack_size = 4194304; + const char* input_string = NULL; + const char* in_pattern = NULL; + const char* out_pattern = NULL; + const char* add_prefix = ""; + const char* chop_prefix = NULL; + const char* root_uri = NULL; + const char* out_filename = NULL; + int a = 1; + for (; a < argc && argv[a][0] == '-'; ++a) { + if (argv[a][1] == '\0') { + break; + } + + for (int o = 1; argv[a][o]; ++o) { + const char opt = argv[a][o]; + + if (opt == 'C') { + canonical = true; + } else if (opt == 'a') { + writer_flags |= SERD_WRITE_ASCII; + } else if (opt == 'f') { + no_inline = true; + writer_flags |= (SERD_WRITE_EXPANDED | SERD_WRITE_VERBATIM); + } else if (opt == 'h') { + return print_usage(prog, false); + } else if (opt == 'l') { + reader_flags |= SERD_READ_LAX; + writer_flags |= SERD_WRITE_LAX; + } else if (argv[a][1] == 'm') { + use_model = true; + } else if (opt == 'q') { + quiet = true; + } else if (opt == 't') { + writer_flags |= SERD_WRITE_TERSE; + } else if (opt == 'v') { + return serd_print_version(argv[0]); + } else if (opt == 'x') { + reader_flags |= SERD_READ_VARIABLES; + } else if (argv[a][1] == 'F') { + if (++a == argc) { + return missing_arg(argv[0], 'F'); + } + + out_pattern = argv[a]; + break; + } else if (argv[a][1] == 'G') { + if (++a == argc) { + return missing_arg(argv[0], 'g'); + } + + in_pattern = argv[a]; + break; + } else if (argv[a][1] == 'I') { + if (++a == argc) { + return missing_arg(prog, 'I'); + } + + base = serd_new_uri(SERD_STRING(argv[a])); + break; + } else if (opt == 'b') { + if (argv[a][o + 1] || ++a == argc) { + return missing_arg(prog, 'b'); + } + + char* endptr = NULL; + const long size = strtol(argv[a], &endptr, 10); + if (size < 1 || size == LONG_MAX || *endptr != '\0') { + SERDI_ERRORF("invalid block size `%s'\n", argv[a]); + return 1; + } + block_size = (size_t)size; + break; + } else if (opt == 'c') { + if (argv[a][o + 1] || ++a == argc) { + return missing_arg(prog, 'c'); + } + + chop_prefix = argv[a]; + break; + } else if (opt == 'i') { + if (argv[a][o + 1] || ++a == argc) { + return missing_arg(prog, 'i'); + } + + if (!(input_syntax = serd_syntax_by_name(argv[a]))) { + return print_usage(prog, true); + } + break; + } else if (opt == 'k') { + if (argv[a][o + 1] || ++a == argc) { + return missing_arg(prog, 'k'); + } + + char* endptr = NULL; + const long size = strtol(argv[a], &endptr, 10); + if (size <= 0 || size == LONG_MAX || *endptr != '\0') { + SERDI_ERRORF("invalid stack size `%s'\n", argv[a]); + return 1; + } + stack_size = (size_t)size; + break; + } else if (opt == 'o') { + osyntax_set = true; + if (argv[a][o + 1] || ++a == argc) { + return missing_arg(prog, 'o'); + } + + if (!strcmp(argv[a], "empty")) { + output_syntax = SERD_SYNTAX_EMPTY; + } else if (!(output_syntax = serd_syntax_by_name(argv[a]))) { + return print_usage(argv[0], true); + } + break; + } else if (opt == 'p') { + if (argv[a][o + 1] || ++a == argc) { + return missing_arg(prog, 'p'); + } + + add_prefix = argv[a]; + break; + } else if (opt == 'r') { + if (argv[a][o + 1] || ++a == argc) { + return missing_arg(prog, 'r'); + } + + root_uri = argv[a]; + break; + } else if (opt == 's') { + if (argv[a][o + 1] || ++a == argc) { + return missing_arg(prog, 's'); + } + + input_string = argv[a]; + break; + } else if (opt == 'w') { + if (argv[a][o + 1] || ++a == argc) { + return missing_arg(argv[0], 'w'); + } + + out_filename = argv[a]; + break; + } else { + SERDI_ERRORF("invalid option -- '%s'\n", argv[a] + 1); + return print_usage(prog, true); + } + } + } + + if (in_pattern && out_pattern) { + SERDI_ERROR("only one of -F and -G can be given at once\n"); + return 1; + } + + if (a == argc && !input_string) { + SERDI_ERROR("missing input\n"); + return 1; + } + + char* const* const inputs = argv + a; + const int n_inputs = argc - a; + + bool input_has_graphs = serd_syntax_has_graphs(input_syntax); + for (int i = a; i < argc; ++i) { + if (serd_syntax_has_graphs(serd_guess_syntax(argv[i]))) { + input_has_graphs = true; + break; + } + } + + if (!output_syntax && !osyntax_set) { + output_syntax = input_has_graphs ? SERD_NQUADS : SERD_NTRIPLES; + } + + if (!base && n_inputs == 1 && + (output_syntax == SERD_NQUADS || output_syntax == SERD_NTRIPLES)) { + // Choose base URI from the single input path + char* const input_path = serd_canonical_path(inputs[0]); + if (!input_path || !(base = serd_new_file_uri(SERD_STRING(input_path), + SERD_EMPTY_STRING()))) { + SERDI_ERRORF("unable to determine base URI from path %s\n", inputs[0]); + } + serd_free(input_path); + } + + SerdWorld* const world = serd_world_new(); + SerdEnv* const env = + serd_env_new(base ? serd_node_string_view(base) : SERD_EMPTY_STRING()); + + serd_set_stream_utf8_mode(stdin); + if (!out_filename) { + serd_set_stream_utf8_mode(stdout); + } + + const SerdDescribeFlags describe_flags = + no_inline ? SERD_NO_INLINE_OBJECTS : 0u; + + SerdByteSink* const byte_sink = serd_open_output(out_filename, block_size); + if (!byte_sink) { + perror("serdi: error opening output file"); + return 1; + } + + SerdWriter* const writer = + serd_writer_new(world, output_syntax, writer_flags, env, byte_sink); + + SerdModel* model = NULL; + SerdSink* inserter = NULL; + const SerdSink* out_sink = NULL; + if (use_model) { + const SerdModelFlags flags = (input_has_graphs ? SERD_STORE_GRAPHS : 0u); + + model = serd_model_new(world, SERD_ORDER_SPO, flags); + if (input_has_graphs) { + serd_model_add_index(model, SERD_ORDER_GSPO); + } + + if (!no_inline) { + serd_model_add_index(model, SERD_ORDER_OPS); + if (input_has_graphs) { + serd_model_add_index(model, SERD_ORDER_GOPS); + } + } + + inserter = serd_inserter_new(model, NULL); + out_sink = inserter; + } else { + out_sink = serd_writer_sink(writer); + } + + const SerdSink* sink = out_sink; + + SerdSink* canon = NULL; + if (canonical) { + sink = canon = serd_canon_new(world, out_sink, reader_flags); + } + + SerdSink* filter = NULL; + if (in_pattern) { + if (!(filter = parse_filter(world, sink, in_pattern, true))) { + SERDI_ERROR("error parsing inclusive filter pattern\n"); + return EXIT_FAILURE; + } + + sink = filter; + } else if (out_pattern) { + if (!(filter = parse_filter(world, sink, out_pattern, false))) { + SERDI_ERROR("error parsing exclusive filter pattern\n"); + return EXIT_FAILURE; + } + + sink = filter; + } + + if (quiet) { + serd_set_log_func(world, serd_quiet_log_func, NULL); + } + + if (root_uri) { + serd_writer_set_root_uri(writer, SERD_STRING(root_uri)); + } + + serd_writer_chop_blank_prefix(writer, chop_prefix); + + SerdStatus st = SERD_SUCCESS; + if (input_string) { + SerdByteSource* const byte_source = + serd_byte_source_new_string(input_string, NULL); + + SerdReader* const reader = + serd_reader_new(world, + input_syntax ? input_syntax : SERD_TRIG, + reader_flags, + env, + sink, + stack_size); + + serd_reader_add_blank_prefix(reader, add_prefix); + + if (!(st = serd_reader_start(reader, byte_source))) { + st = serd_reader_read_document(reader); + } + + serd_reader_free(reader); + serd_byte_source_free(byte_source); + } + + size_t prefix_len = 0; + char* prefix = NULL; + if (n_inputs > 1) { + prefix_len = 8 + strlen(add_prefix); + prefix = (char*)calloc(1, prefix_len); + } + + for (int i = 0; !st && i < n_inputs; ++i) { + if (!base && strcmp(inputs[i], "-")) { + if ((st = serd_set_base_uri_from_path(env, inputs[i]))) { + SERDI_ERRORF("failed to set base URI from path %s\n", inputs[i]); + break; + } + } + + if (n_inputs > 1) { + snprintf(prefix, prefix_len, "f%d%s", i, add_prefix); + } + + if ((st = read_file(world, + input_syntax, + reader_flags, + env, + sink, + stack_size, + inputs[i], + n_inputs > 1 ? prefix : add_prefix, + block_size))) { + break; + } + } + free(prefix); + + if (st <= SERD_FAILURE && use_model) { + const SerdSink* writer_sink = serd_writer_sink(writer); + SerdCursor* everything = serd_model_begin_ordered( + model, input_has_graphs ? SERD_ORDER_GSPO : SERD_ORDER_SPO); + + serd_env_write_prefixes(env, writer_sink); + + st = serd_describe_range( + everything, + writer_sink, + describe_flags | + ((output_syntax == SERD_NTRIPLES || output_syntax == SERD_NQUADS) + ? SERD_NO_INLINE_OBJECTS + : 0u)); + + serd_cursor_free(everything); + } + + serd_sink_free(canon); + serd_sink_free(filter); + serd_sink_free(inserter); + serd_model_free(model); + serd_writer_free(writer); + serd_env_free(env); + serd_node_free(base); + serd_world_free(world); + + if (serd_byte_sink_close(byte_sink) || (!out_filename && fclose(stdout))) { + perror("serdi: write error"); + st = SERD_ERR_UNKNOWN; + } + + serd_byte_sink_free(byte_sink); + + return (st > SERD_FAILURE) ? 1 : 0; +} -- cgit v1.2.1