// Copyright 2011-2023 David Robillard // SPDX-License-Identifier: ISC #include "serd_config.h" #include "string_utils.h" #include "serd/attributes.h" #include "serd/env.h" #include "serd/error.h" #include "serd/node.h" #include "serd/reader.h" #include "serd/sink.h" #include "serd/status.h" #include "serd/string_view.h" #include "serd/syntax.h" #include "serd/uri.h" #include "serd/version.h" #include "serd/writer.h" #ifdef _WIN32 # ifdef _MSC_VER # define WIN32_LEAN_AND_MEAN # endif # include # include #endif #if USE_POSIX_FADVISE && USE_FILENO # include #endif #include #include #include #include #include #define SERDI_ERROR(msg) fprintf(stderr, "serdi: " msg) #define SERDI_ERRORF(fmt, ...) fprintf(stderr, "serdi: " fmt, __VA_ARGS__) typedef struct { SerdSyntax syntax; const char* name; const char* extension; } Syntax; static const Syntax syntaxes[] = {{SERD_TURTLE, "turtle", ".ttl"}, {SERD_NTRIPLES, "ntriples", ".nt"}, {SERD_NQUADS, "nquads", ".nq"}, {SERD_TRIG, "trig", ".trig"}, {(SerdSyntax)0, NULL, NULL}}; static SerdSyntax get_syntax(const char* const name) { for (const Syntax* s = syntaxes; s->name; ++s) { if (!serd_strncasecmp(s->name, name, strlen(name))) { return s->syntax; } } SERDI_ERRORF("unknown syntax '%s'\n", name); return (SerdSyntax)0; } static SERD_PURE_FUNC SerdSyntax guess_syntax(const char* const filename) { const char* ext = strrchr(filename, '.'); if (ext) { for (const Syntax* s = syntaxes; s->name; ++s) { if (!serd_strncasecmp(s->extension, ext, strlen(ext))) { return s->syntax; } } } return (SerdSyntax)0; } static int print_version(void) { printf("serdi %d.%d.%d \n", SERD_MAJOR_VERSION, SERD_MINOR_VERSION, SERD_MICRO_VERSION); printf("Copyright 2011-2023 David Robillard .\n" "License ISC: .\n" "This is free software; you are free to change and redistribute it." "\nThere is NO WARRANTY, to the extent permitted by law.\n"); return 0; } static int print_usage(const char* const name, const bool error) { static const char* const description = "Read and write RDF syntax.\n" "Use - for INPUT to read from standard input.\n\n" " -a Write ASCII output.\n" " -b Write output in blocks for performance.\n" " -c PREFIX Chop PREFIX from matching blank node IDs.\n" " -e Eat input one character at a time.\n" " -f Fast and loose URI pass-through.\n" " -h Display this help and exit.\n" " -i SYNTAX Input syntax: turtle/ntriples/trig/nquads.\n" " -l Lax (non-strict) parsing.\n" " -o SYNTAX Output syntax: turtle/ntriples/nquads.\n" " -p PREFIX Add PREFIX to blank node IDs.\n" " -q Suppress all output except data.\n" " -r ROOT_URI Keep relative URIs within ROOT_URI.\n" " -s INPUT Parse INPUT as string (terminates options).\n" " -v Display version information and exit.\n"; FILE* const os = error ? stderr : stdout; fprintf(os, "%s", error ? "\n" : ""); fprintf(os, "Usage: %s [OPTION]... INPUT [BASE_URI]\n", name); fprintf(os, "%s", description); return error ? 1 : 0; } static int missing_arg(const char* const name, const char opt) { SERDI_ERRORF("option requires an argument -- '%c'\n", opt); return print_usage(name, true); } static SerdStatus quiet_error_sink(void* const handle, const SerdError* const e) { (void)handle; (void)e; return SERD_SUCCESS; } static FILE* serd_fopen(const char* const path, const char* const mode) { FILE* fd = fopen(path, mode); if (!fd) { SERDI_ERRORF("failed to open file %s (%s)\n", path, strerror(errno)); return NULL; } #if USE_POSIX_FADVISE && USE_FILENO (void)posix_fadvise( fileno(fd), 0, 0, POSIX_FADV_SEQUENTIAL | POSIX_FADV_NOREUSE); #endif return fd; } static SerdWriterFlags choose_style(const SerdSyntax input_syntax, const SerdSyntax output_syntax, const bool ascii, const bool bulk_write, const bool full_uris, const bool lax) { SerdWriterFlags writer_flags = 0U; if (output_syntax == SERD_NTRIPLES || ascii) { writer_flags |= SERD_WRITE_ASCII; } else if (output_syntax == SERD_TURTLE) { writer_flags |= SERD_WRITE_ABBREVIATED; if (!full_uris) { writer_flags |= SERD_WRITE_CURIED; } } if ((input_syntax == SERD_TURTLE || input_syntax == SERD_TRIG) || (writer_flags & SERD_WRITE_CURIED)) { // Base URI may change and/or we're abbreviating URIs, so must resolve writer_flags |= SERD_WRITE_RESOLVED; } if (bulk_write) { writer_flags |= SERD_WRITE_BULK; } if (!lax) { writer_flags |= SERD_WRITE_STRICT; } return writer_flags; } int main(int argc, char** argv) { const char* const prog = argv[0]; FILE* in_fd = NULL; SerdSyntax input_syntax = (SerdSyntax)0; SerdSyntax output_syntax = (SerdSyntax)0; bool from_file = true; bool ascii = false; bool bulk_read = true; bool bulk_write = false; bool full_uris = false; bool lax = false; bool quiet = false; const char* in_name = NULL; const char* add_prefix = NULL; const char* chop_prefix = NULL; const char* root_uri = NULL; int a = 1; for (; a < argc && from_file && argv[a][0] == '-'; ++a) { if (argv[a][1] == '\0') { in_name = (const char*)"(stdin)"; in_fd = stdin; break; } if (!strcmp(argv[a], "--help")) { return print_usage(prog, false); } if (!strcmp(argv[a], "--version")) { return print_version(); } for (int o = 1; argv[a][o]; ++o) { const char opt = argv[a][o]; if (opt == 'a') { ascii = true; } else if (opt == 'b') { bulk_write = true; } else if (opt == 'e') { bulk_read = false; } else if (opt == 'f') { full_uris = true; } else if (opt == 'h') { return print_usage(prog, false); } else if (opt == 'l') { lax = true; } else if (opt == 'q') { quiet = true; } else if (opt == 'v') { return print_version(); } else if (opt == 's') { in_name = "(string)"; from_file = false; break; } else if (opt == 'c') { if (argv[a][o + 1] || ++a == argc) { return missing_arg(prog, 'c'); } chop_prefix = argv[a]; break; } else if (opt == 'i') { if (argv[a][o + 1] || ++a == argc) { return missing_arg(prog, 'i'); } if (!(input_syntax = get_syntax(argv[a]))) { return print_usage(prog, true); } break; } else if (opt == 'o') { if (argv[a][o + 1] || ++a == argc) { return missing_arg(prog, 'o'); } if (!(output_syntax = get_syntax(argv[a]))) { return print_usage(prog, true); } break; } else if (opt == 'p') { if (argv[a][o + 1] || ++a == argc) { return missing_arg(prog, 'p'); } add_prefix = argv[a]; break; } else if (opt == 'r') { if (argv[a][o + 1] || ++a == argc) { return missing_arg(prog, 'r'); } root_uri = argv[a]; break; } else { SERDI_ERRORF("invalid option -- '%s'\n", argv[a] + 1); return print_usage(prog, true); } } } if (a == argc) { SERDI_ERROR("missing input\n"); return print_usage(prog, true); } #ifdef _WIN32 _setmode(_fileno(stdin), _O_BINARY); _setmode(_fileno(stdout), _O_BINARY); #endif char* input_path = NULL; const char* input = (const char*)argv[a++]; if (from_file) { in_name = in_name ? in_name : input; if (!in_fd) { if (!strncmp(input, "file:", 5)) { input_path = serd_parse_file_uri(input, NULL); input = input_path; } if (!input || !(in_fd = serd_fopen(input, "rb"))) { return 1; } } } if (!input_syntax && !(input_syntax = guess_syntax(in_name))) { input_syntax = SERD_TRIG; } if (!output_syntax) { output_syntax = ((input_syntax == SERD_TURTLE || input_syntax == SERD_NTRIPLES) ? SERD_NTRIPLES : SERD_NQUADS); } const SerdWriterFlags writer_flags = choose_style( input_syntax, output_syntax, ascii, bulk_write, full_uris, lax); SerdNode* base = NULL; if (a < argc) { // Base URI given on command line base = serd_new_uri(serd_string((const char*)argv[a])); } else if (from_file && in_fd != stdin) { // Use input file URI base = serd_new_file_uri(serd_string(input), serd_empty_string()); } FILE* const out_fd = stdout; SerdEnv* const env = serd_env_new(base ? serd_node_string_view(base) : serd_empty_string()); SerdWriter* const writer = serd_writer_new(output_syntax, writer_flags, env, serd_file_sink, out_fd); SerdReader* const reader = serd_reader_new(input_syntax, writer, NULL, (SerdBaseFunc)serd_writer_set_base_uri, (SerdPrefixFunc)serd_writer_set_prefix, (SerdStatementFunc)serd_writer_write_statement, (SerdEndFunc)serd_writer_end_anon); serd_reader_set_strict(reader, !lax); if (quiet) { serd_reader_set_error_sink(reader, quiet_error_sink, NULL); serd_writer_set_error_sink(writer, quiet_error_sink, NULL); } if (root_uri) { SerdNode* const root = serd_new_uri(serd_string(root_uri)); serd_writer_set_root_uri(writer, root); serd_node_free(root); } serd_writer_chop_blank_prefix(writer, chop_prefix); serd_reader_add_blank_prefix(reader, add_prefix); SerdStatus st = SERD_SUCCESS; if (!from_file) { st = serd_reader_read_string(reader, input); } else if (bulk_read) { st = serd_reader_read_file_handle(reader, in_fd, in_name); } else { st = serd_reader_start_stream(reader, in_fd, in_name, false); while (!st) { st = serd_reader_read_chunk(reader); } serd_reader_end_stream(reader); } serd_reader_free(reader); serd_writer_finish(writer); serd_writer_free(writer); serd_env_free(env); serd_node_free(base); free(input_path); if (from_file) { fclose(in_fd); } if (fclose(out_fd)) { perror("serdi: write error"); st = SERD_BAD_STREAM; } return (st > SERD_FAILURE) ? 1 : 0; }