diff options
Diffstat (limited to 'tools/serdi.c')
-rw-r--r-- | tools/serdi.c | 290 |
1 files changed, 290 insertions, 0 deletions
diff --git a/tools/serdi.c b/tools/serdi.c new file mode 100644 index 00000000..fd7fdb15 --- /dev/null +++ b/tools/serdi.c @@ -0,0 +1,290 @@ +// Copyright 2011-2023 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#include "console.h" + +#include "serd/env.h" +#include "serd/error.h" +#include "serd/input_stream.h" +#include "serd/node.h" +#include "serd/output_stream.h" +#include "serd/reader.h" +#include "serd/status.h" +#include "serd/stream.h" +#include "serd/string_view.h" +#include "serd/syntax.h" +#include "serd/world.h" +#include "serd/writer.h" + +#include <limits.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#define SERDI_ERROR(msg) fprintf(stderr, "serdi: " msg) +#define SERDI_ERRORF(fmt, ...) fprintf(stderr, "serdi: " fmt, __VA_ARGS__) + +#define MAX_DEPTH 128U + +static int +print_usage(const char* const name, const bool error) +{ + static const char* const description = + "Read and write RDF syntax.\n" + "Use - for INPUT to read from standard input.\n\n" + " -a Write ASCII output.\n" + " -b Write output in blocks for performance.\n" + " -c PREFIX Chop PREFIX from matching blank node IDs.\n" + " -e Eat input one character at a time.\n" + " -f Fast and loose URI pass-through.\n" + " -h Display this help and exit.\n" + " -i SYNTAX Input syntax: turtle/ntriples/trig/nquads.\n" + " -k BYTES Parser stack size.\n" + " -l Lax (non-strict) parsing.\n" + " -o SYNTAX Output syntax: empty/turtle/ntriples/nquads.\n" + " -p PREFIX Add PREFIX to blank node IDs.\n" + " -q Suppress all output except data.\n" + " -r ROOT_URI Keep relative URIs within ROOT_URI.\n" + " -s INPUT Parse INPUT as string (terminates options).\n" + " -t Write terser output without newlines.\n" + " -v Display version information and exit.\n"; + + FILE* const os = error ? stderr : stdout; + fprintf(os, "%s", error ? "\n" : ""); + fprintf(os, "Usage: %s [OPTION]... INPUT [BASE_URI]\n", name); + fprintf(os, "%s", description); + return error ? 1 : 0; +} + +static int +missing_arg(const char* const name, const char opt) +{ + SERDI_ERRORF("option requires an argument -- '%c'\n", opt); + return print_usage(name, true); +} + +static SerdStatus +quiet_error_func(void* const handle, const SerdError* const e) +{ + (void)handle; + (void)e; + return SERD_SUCCESS; +} + +int +main(int argc, char** argv) +{ + const char* const prog = argv[0]; + + SerdSyntax input_syntax = SERD_SYNTAX_EMPTY; + SerdSyntax output_syntax = SERD_SYNTAX_EMPTY; + SerdReaderFlags reader_flags = 0; + SerdWriterFlags writer_flags = 0; + bool from_string = false; + bool from_stdin = false; + bool bulk_read = true; + bool bulk_write = false; + bool osyntax_set = false; + bool quiet = false; + size_t stack_size = 1048576U; + const char* add_prefix = NULL; + const char* chop_prefix = NULL; + const char* root_uri = NULL; + int a = 1; + for (; a < argc && !from_string && argv[a][0] == '-'; ++a) { + if (argv[a][1] == '\0') { + from_stdin = true; + break; + } + + if (!strcmp(argv[a], "--help")) { + return print_usage(prog, false); + } + + if (!strcmp(argv[a], "--version")) { + return serd_print_version(argv[0]); + } + + for (int o = 1; argv[a][o]; ++o) { + const char opt = argv[a][o]; + + if (opt == 'a') { + writer_flags |= SERD_WRITE_ASCII; + } else if (opt == 'b') { + bulk_write = true; + } else if (opt == 'e') { + bulk_read = false; + } else if (opt == 'f') { + writer_flags |= (SERD_WRITE_UNQUALIFIED | SERD_WRITE_UNRESOLVED); + } else if (opt == 'h') { + return print_usage(prog, false); + } else if (opt == 'l') { + reader_flags |= SERD_READ_LAX; + writer_flags |= SERD_WRITE_LAX; + } else if (opt == 'q') { + quiet = true; + } else if (opt == 't') { + writer_flags |= SERD_WRITE_TERSE; + } else if (opt == 'v') { + return serd_print_version(argv[0]); + } else if (opt == 's') { + from_string = true; + break; + } else if (opt == 'c') { + if (argv[a][o + 1] || ++a == argc) { + return missing_arg(prog, 'c'); + } + + chop_prefix = argv[a]; + break; + } else if (opt == 'i') { + if (argv[a][o + 1] || ++a == argc) { + return missing_arg(prog, 'i'); + } + + if (!(input_syntax = serd_syntax_by_name(argv[a]))) { + return print_usage(prog, true); + } + break; + } else if (opt == 'k') { + if (argv[a][o + 1] || ++a == argc) { + return missing_arg(prog, 'k'); + } + + char* endptr = NULL; + const long size = strtol(argv[a], &endptr, 10); + if (size <= 0 || size == LONG_MAX || *endptr != '\0') { + SERDI_ERRORF("invalid stack size '%s'\n", argv[a]); + return 1; + } + stack_size = (size_t)size; + break; + } else if (opt == 'o') { + osyntax_set = true; + if (argv[a][o + 1] || ++a == argc) { + return missing_arg(prog, 'o'); + } + + if (!strcmp(argv[a], "empty")) { + output_syntax = SERD_SYNTAX_EMPTY; + } else if (!(output_syntax = serd_syntax_by_name(argv[a]))) { + return print_usage(argv[0], true); + } + break; + } else if (opt == 'p') { + if (argv[a][o + 1] || ++a == argc) { + return missing_arg(prog, 'p'); + } + + add_prefix = argv[a]; + break; + } else if (opt == 'r') { + if (argv[a][o + 1] || ++a == argc) { + return missing_arg(prog, 'r'); + } + + root_uri = argv[a]; + break; + } else { + SERDI_ERRORF("invalid option -- '%s'\n", argv[a] + 1); + return print_usage(prog, true); + } + } + } + + if (a == argc) { + SERDI_ERROR("missing input\n"); + return print_usage(prog, true); + } + + serd_set_stream_utf8_mode(stdin); + serd_set_stream_utf8_mode(stdout); + + const char* input = argv[a++]; + + if (!input_syntax && !(input_syntax = serd_guess_syntax(input))) { + input_syntax = SERD_TRIG; + } + + const bool input_has_graphs = serd_syntax_has_graphs(input_syntax); + if (!output_syntax && !osyntax_set) { + output_syntax = input_has_graphs ? SERD_NQUADS : SERD_NTRIPLES; + } + + SerdNode* base = NULL; + if (a < argc) { // Base URI given on command line + base = serd_new_uri(serd_string(argv[a])); + } else if (!from_string && !from_stdin) { // Use input file URI + base = serd_new_file_uri(serd_string(input), serd_empty_string()); + } + + FILE* const out_fd = stdout; + SerdWorld* const world = serd_world_new(); + SerdEnv* const env = + serd_env_new(base ? serd_node_string_view(base) : serd_empty_string()); + + SerdOutputStream out = serd_open_output_stream((SerdWriteFunc)fwrite, + (SerdErrorFunc)ferror, + (SerdCloseFunc)fclose, + out_fd); + + SerdWriter* const writer = serd_writer_new( + world, output_syntax, writer_flags, env, &out, bulk_write ? 4096U : 1U); + + const SerdLimits limits = {stack_size, MAX_DEPTH}; + serd_world_set_limits(world, limits); + + SerdReader* const reader = serd_reader_new( + world, input_syntax, reader_flags, serd_writer_sink(writer)); + + if (quiet) { + serd_world_set_error_func(world, quiet_error_func, NULL); + } + + if (root_uri) { + serd_writer_set_root_uri(writer, serd_string(root_uri)); + } + + serd_writer_chop_blank_prefix(writer, chop_prefix); + serd_reader_add_blank_prefix(reader, add_prefix); + + SerdStatus st = SERD_SUCCESS; + SerdNode* input_name = NULL; + const char* position = NULL; + SerdInputStream in = {NULL, NULL, NULL, NULL}; + size_t block_size = 1U; + if (from_string) { + position = input; + in = serd_open_input_string(&position); + input_name = serd_new_string(serd_string("string")); + } else if (from_stdin) { + in = serd_open_input_stream( + (SerdReadFunc)fread, (SerdErrorFunc)ferror, (SerdCloseFunc)fclose, stdin); + input_name = serd_new_string(serd_string("stdin")); + } else { + block_size = bulk_read ? 4096U : 1U; + in = serd_open_input_file(input); + input_name = serd_new_string(serd_string(input)); + } + + if (!(st = serd_reader_start(reader, &in, input_name, block_size))) { + st = serd_reader_read_document(reader); + } + + serd_reader_finish(reader); + serd_reader_free(reader); + serd_writer_finish(writer); + serd_writer_free(writer); + serd_node_free(input_name); + serd_env_free(env); + serd_node_free(base); + serd_world_free(world); + + if (fclose(stdout)) { + perror("serdi: write error"); + st = SERD_BAD_STREAM; + } + + return (st > SERD_FAILURE) ? 1 : 0; +} |