diff options
Diffstat (limited to 'src/serdi.c')
-rw-r--r-- | src/serdi.c | 552 |
1 files changed, 277 insertions, 275 deletions
diff --git a/src/serdi.c b/src/serdi.c index 69304cad..44644ff4 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -22,13 +22,13 @@ #include "serd/serd.h" #ifdef _WIN32 -#define WIN32_LEAN_AND_MEAN 1 -#include <fcntl.h> -#include <io.h> +# define WIN32_LEAN_AND_MEAN 1 +# include <fcntl.h> +# include <io.h> #endif #if defined(HAVE_POSIX_FADVISE) && defined(HAVE_FILENO) -#include <fcntl.h> +# include <fcntl.h> #endif #include <errno.h> @@ -38,114 +38,114 @@ #include <stdlib.h> #include <string.h> -#define SERDI_ERROR(msg) fprintf(stderr, "serdi: " msg) +#define SERDI_ERROR(msg) fprintf(stderr, "serdi: " msg) #define SERDI_ERRORF(fmt, ...) fprintf(stderr, "serdi: " fmt, __VA_ARGS__) typedef struct { - SerdSyntax syntax; - const char* name; - const char* extension; + SerdSyntax syntax; + const char* name; + const char* extension; } Syntax; -static const Syntax syntaxes[] = { - {SERD_TURTLE, "turtle", ".ttl"}, - {SERD_NTRIPLES, "ntriples", ".nt"}, - {SERD_NQUADS, "nquads", ".nq"}, - {SERD_TRIG, "trig", ".trig"}, - {(SerdSyntax)0, NULL, NULL} -}; +static const Syntax syntaxes[] = {{SERD_TURTLE, "turtle", ".ttl"}, + {SERD_NTRIPLES, "ntriples", ".nt"}, + {SERD_NQUADS, "nquads", ".nq"}, + {SERD_TRIG, "trig", ".trig"}, + {(SerdSyntax)0, NULL, NULL}}; static SerdSyntax get_syntax(const char* name) { - for (const Syntax* s = syntaxes; s->name; ++s) { - if (!serd_strncasecmp(s->name, name, strlen(name))) { - return s->syntax; - } - } - SERDI_ERRORF("unknown syntax `%s'\n", name); - return (SerdSyntax)0; + for (const Syntax* s = syntaxes; s->name; ++s) { + if (!serd_strncasecmp(s->name, name, strlen(name))) { + return s->syntax; + } + } + + SERDI_ERRORF("unknown syntax `%s'\n", name); + return (SerdSyntax)0; } static SERD_PURE_FUNC SerdSyntax guess_syntax(const char* filename) { - const char* ext = strrchr(filename, '.'); - if (ext) { - for (const Syntax* s = syntaxes; s->name; ++s) { - if (!serd_strncasecmp(s->extension, ext, strlen(ext))) { - return s->syntax; - } - } - } - return (SerdSyntax)0; + const char* ext = strrchr(filename, '.'); + if (ext) { + for (const Syntax* s = syntaxes; s->name; ++s) { + if (!serd_strncasecmp(s->extension, ext, strlen(ext))) { + return s->syntax; + } + } + } + + return (SerdSyntax)0; } static int print_version(void) { - printf("serdi " SERD_VERSION " <http://drobilla.net/software/serd>\n"); - printf("Copyright 2011-2020 David Robillard <http://drobilla.net>.\n" - "License: <http://www.opensource.org/licenses/isc>\n" - "This is free software; you are free to change and redistribute it." - "\nThere is NO WARRANTY, to the extent permitted by law.\n"); - return 0; + printf("serdi " SERD_VERSION " <http://drobilla.net/software/serd>\n"); + printf("Copyright 2011-2020 David Robillard <http://drobilla.net>.\n" + "License: <http://www.opensource.org/licenses/isc>\n" + "This is free software; you are free to change and redistribute it." + "\nThere is NO WARRANTY, to the extent permitted by law.\n"); + return 0; } static int print_usage(const char* name, bool error) { - FILE* const os = error ? stderr : stdout; - fprintf(os, "%s", error ? "\n" : ""); - fprintf(os, "Usage: %s [OPTION]... INPUT [BASE_URI]\n", name); - fprintf(os, "Read and write RDF syntax.\n"); - fprintf(os, "Use - for INPUT to read from standard input.\n\n"); - fprintf(os, " -a Write ASCII output if possible.\n"); - fprintf(os, " -b Fast bulk output for large serialisations.\n"); - fprintf(os, " -c PREFIX Chop PREFIX from matching blank node IDs.\n"); - fprintf(os, " -e Eat input one character at a time.\n"); - fprintf(os, " -f Keep full URIs in input (don't qualify).\n"); - fprintf(os, " -h Display this help and exit.\n"); - fprintf(os, " -i SYNTAX Input syntax: turtle/ntriples/trig/nquads.\n"); - fprintf(os, " -l Lax (non-strict) parsing.\n"); - fprintf(os, " -o SYNTAX Output syntax: turtle/ntriples/nquads.\n"); - fprintf(os, " -p PREFIX Add PREFIX to blank node IDs.\n"); - fprintf(os, " -q Suppress all output except data.\n"); - fprintf(os, " -r ROOT_URI Keep relative URIs within ROOT_URI.\n"); - fprintf(os, " -s INPUT Parse INPUT as string (terminates options).\n"); - fprintf(os, " -v Display version information and exit.\n"); - return error ? 1 : 0; + FILE* const os = error ? stderr : stdout; + fprintf(os, "%s", error ? "\n" : ""); + fprintf(os, "Usage: %s [OPTION]... INPUT [BASE_URI]\n", name); + fprintf(os, "Read and write RDF syntax.\n"); + fprintf(os, "Use - for INPUT to read from standard input.\n\n"); + fprintf(os, " -a Write ASCII output if possible.\n"); + fprintf(os, " -b Fast bulk output for large serialisations.\n"); + fprintf(os, " -c PREFIX Chop PREFIX from matching blank node IDs.\n"); + fprintf(os, " -e Eat input one character at a time.\n"); + fprintf(os, " -f Keep full URIs in input (don't qualify).\n"); + fprintf(os, " -h Display this help and exit.\n"); + fprintf(os, " -i SYNTAX Input syntax: turtle/ntriples/trig/nquads.\n"); + fprintf(os, " -l Lax (non-strict) parsing.\n"); + fprintf(os, " -o SYNTAX Output syntax: turtle/ntriples/nquads.\n"); + fprintf(os, " -p PREFIX Add PREFIX to blank node IDs.\n"); + fprintf(os, " -q Suppress all output except data.\n"); + fprintf(os, " -r ROOT_URI Keep relative URIs within ROOT_URI.\n"); + fprintf(os, " -s INPUT Parse INPUT as string (terminates options).\n"); + fprintf(os, " -v Display version information and exit.\n"); + return error ? 1 : 0; } static int missing_arg(const char* name, char opt) { - SERDI_ERRORF("option requires an argument -- '%c'\n", opt); - return print_usage(name, true); + SERDI_ERRORF("option requires an argument -- '%c'\n", opt); + return print_usage(name, true); } static SerdStatus quiet_error_sink(void* handle, const SerdError* e) { - (void)handle; - (void)e; - return SERD_SUCCESS; + (void)handle; + (void)e; + return SERD_SUCCESS; } static inline FILE* serd_fopen(const char* path, const char* mode) { - FILE* fd = fopen(path, mode); - if (!fd) { - SERDI_ERRORF("failed to open file %s (%s)\n", path, strerror(errno)); - return NULL; - } + FILE* fd = fopen(path, mode); + if (!fd) { + SERDI_ERRORF("failed to open file %s (%s)\n", path, strerror(errno)); + return NULL; + } #if defined(HAVE_POSIX_FADVISE) && defined(HAVE_FILENO) - posix_fadvise(fileno(fd), 0, 0, POSIX_FADV_SEQUENTIAL|POSIX_FADV_NOREUSE); + posix_fadvise(fileno(fd), 0, 0, POSIX_FADV_SEQUENTIAL | POSIX_FADV_NOREUSE); #endif - return fd; + return fd; } static SerdStyle @@ -155,219 +155,221 @@ choose_style(const SerdSyntax input_syntax, const bool bulk_write, const bool full_uris) { - unsigned output_style = 0u; - if (output_syntax == SERD_NTRIPLES || ascii) { - output_style |= SERD_STYLE_ASCII; - } else if (output_syntax == SERD_TURTLE) { - output_style |= SERD_STYLE_ABBREVIATED; - if (!full_uris) { - output_style |= SERD_STYLE_CURIED; - } - } - - if ((input_syntax == SERD_TURTLE || input_syntax == SERD_TRIG) || - (output_style & SERD_STYLE_CURIED)) { - // Base URI may change and/or we're abbreviating URIs, so must resolve - output_style |= SERD_STYLE_RESOLVED; - } - - if (bulk_write) { - output_style |= SERD_STYLE_BULK; - } - - return (SerdStyle)output_style; + unsigned output_style = 0u; + if (output_syntax == SERD_NTRIPLES || ascii) { + output_style |= SERD_STYLE_ASCII; + } else if (output_syntax == SERD_TURTLE) { + output_style |= SERD_STYLE_ABBREVIATED; + if (!full_uris) { + output_style |= SERD_STYLE_CURIED; + } + } + + if ((input_syntax == SERD_TURTLE || input_syntax == SERD_TRIG) || + (output_style & SERD_STYLE_CURIED)) { + // Base URI may change and/or we're abbreviating URIs, so must resolve + output_style |= SERD_STYLE_RESOLVED; + } + + if (bulk_write) { + output_style |= SERD_STYLE_BULK; + } + + return (SerdStyle)output_style; } int main(int argc, char** argv) { - if (argc < 2) { - return print_usage(argv[0], true); - } - - FILE* in_fd = NULL; - SerdSyntax input_syntax = (SerdSyntax)0; - SerdSyntax output_syntax = (SerdSyntax)0; - bool from_file = true; - bool ascii = false; - bool bulk_read = true; - bool bulk_write = false; - bool full_uris = false; - bool lax = false; - bool quiet = false; - const uint8_t* in_name = NULL; - const uint8_t* add_prefix = NULL; - const uint8_t* chop_prefix = NULL; - const uint8_t* root_uri = NULL; - int a = 1; - for (; a < argc && argv[a][0] == '-'; ++a) { - if (argv[a][1] == '\0') { - in_name = (const uint8_t*)"(stdin)"; - in_fd = stdin; - break; - } - - if (argv[a][1] == 'a') { - ascii = true; - } else if (argv[a][1] == 'b') { - bulk_write = true; - } else if (argv[a][1] == 'e') { - bulk_read = false; - } else if (argv[a][1] == 'f') { - full_uris = true; - } else if (argv[a][1] == 'h') { - return print_usage(argv[0], false); - } else if (argv[a][1] == 'l') { - lax = true; - } else if (argv[a][1] == 'q') { - quiet = true; - } else if (argv[a][1] == 'v') { - return print_version(); - } else if (argv[a][1] == 's') { - in_name = (const uint8_t*)"(string)"; - from_file = false; - ++a; - break; - } else if (argv[a][1] == 'i') { - if (++a == argc) { - return missing_arg(argv[0], 'i'); - } - - if (!(input_syntax = get_syntax(argv[a]))) { - return print_usage(argv[0], true); - } - } else if (argv[a][1] == 'o') { - if (++a == argc) { - return missing_arg(argv[0], 'o'); - } - - if (!(output_syntax = get_syntax(argv[a]))) { - return print_usage(argv[0], true); - } - } else if (argv[a][1] == 'p') { - if (++a == argc) { - return missing_arg(argv[0], 'p'); - } - - add_prefix = (const uint8_t*)argv[a]; - } else if (argv[a][1] == 'c') { - if (++a == argc) { - return missing_arg(argv[0], 'c'); - } - - chop_prefix = (const uint8_t*)argv[a]; - } else if (argv[a][1] == 'r') { - if (++a == argc) { - return missing_arg(argv[0], 'r'); - } - - root_uri = (const uint8_t*)argv[a]; - } else { - SERDI_ERRORF("invalid option -- '%s'\n", argv[a] + 1); - return print_usage(argv[0], true); - } - } - - if (a == argc) { - SERDI_ERROR("missing input\n"); - return 1; - } + if (argc < 2) { + return print_usage(argv[0], true); + } + + FILE* in_fd = NULL; + SerdSyntax input_syntax = (SerdSyntax)0; + SerdSyntax output_syntax = (SerdSyntax)0; + bool from_file = true; + bool ascii = false; + bool bulk_read = true; + bool bulk_write = false; + bool full_uris = false; + bool lax = false; + bool quiet = false; + const uint8_t* in_name = NULL; + const uint8_t* add_prefix = NULL; + const uint8_t* chop_prefix = NULL; + const uint8_t* root_uri = NULL; + int a = 1; + for (; a < argc && argv[a][0] == '-'; ++a) { + if (argv[a][1] == '\0') { + in_name = (const uint8_t*)"(stdin)"; + in_fd = stdin; + break; + } + + if (argv[a][1] == 'a') { + ascii = true; + } else if (argv[a][1] == 'b') { + bulk_write = true; + } else if (argv[a][1] == 'e') { + bulk_read = false; + } else if (argv[a][1] == 'f') { + full_uris = true; + } else if (argv[a][1] == 'h') { + return print_usage(argv[0], false); + } else if (argv[a][1] == 'l') { + lax = true; + } else if (argv[a][1] == 'q') { + quiet = true; + } else if (argv[a][1] == 'v') { + return print_version(); + } else if (argv[a][1] == 's') { + in_name = (const uint8_t*)"(string)"; + from_file = false; + ++a; + break; + } else if (argv[a][1] == 'i') { + if (++a == argc) { + return missing_arg(argv[0], 'i'); + } + + if (!(input_syntax = get_syntax(argv[a]))) { + return print_usage(argv[0], true); + } + } else if (argv[a][1] == 'o') { + if (++a == argc) { + return missing_arg(argv[0], 'o'); + } + + if (!(output_syntax = get_syntax(argv[a]))) { + return print_usage(argv[0], true); + } + } else if (argv[a][1] == 'p') { + if (++a == argc) { + return missing_arg(argv[0], 'p'); + } + + add_prefix = (const uint8_t*)argv[a]; + } else if (argv[a][1] == 'c') { + if (++a == argc) { + return missing_arg(argv[0], 'c'); + } + + chop_prefix = (const uint8_t*)argv[a]; + } else if (argv[a][1] == 'r') { + if (++a == argc) { + return missing_arg(argv[0], 'r'); + } + + root_uri = (const uint8_t*)argv[a]; + } else { + SERDI_ERRORF("invalid option -- '%s'\n", argv[a] + 1); + return print_usage(argv[0], true); + } + } + + if (a == argc) { + SERDI_ERROR("missing input\n"); + return 1; + } #ifdef _WIN32 - _setmode(_fileno(stdin), _O_BINARY); - _setmode(_fileno(stdout), _O_BINARY); + _setmode(_fileno(stdin), _O_BINARY); + _setmode(_fileno(stdout), _O_BINARY); #endif - uint8_t* input_path = NULL; - const uint8_t* input = (const uint8_t*)argv[a++]; - if (from_file) { - in_name = in_name ? in_name : input; - if (!in_fd) { - if (!strncmp((const char*)input, "file:", 5)) { - input_path = serd_file_uri_parse(input, NULL); - input = input_path; - } - if (!input || !(in_fd = serd_fopen((const char*)input, "rb"))) { - return 1; - } - } - } - - if (!input_syntax && !(input_syntax = guess_syntax((const char*)in_name))) { - input_syntax = SERD_TRIG; - } - - if (!output_syntax) { - output_syntax = ( - (input_syntax == SERD_TURTLE || input_syntax == SERD_NTRIPLES) - ? SERD_NTRIPLES - : SERD_NQUADS); - } - - const SerdStyle output_style = - choose_style(input_syntax, output_syntax, ascii, bulk_write, full_uris); - - SerdURI base_uri = SERD_URI_NULL; - SerdNode base = SERD_NODE_NULL; - if (a < argc) { // Base URI given on command line - base = serd_node_new_uri_from_string( - (const uint8_t*)argv[a], NULL, &base_uri); - } else if (from_file && in_fd != stdin) { // Use input file URI - base = serd_node_new_file_uri(input, NULL, &base_uri, true); - } - - FILE* const out_fd = stdout; - SerdEnv* const env = serd_env_new(&base); - - SerdWriter* const writer = serd_writer_new( - output_syntax, output_style, env, &base_uri, serd_file_sink, out_fd); - - SerdReader* const reader = serd_reader_new( - input_syntax, writer, NULL, - (SerdBaseSink)serd_writer_set_base_uri, - (SerdPrefixSink)serd_writer_set_prefix, - (SerdStatementSink)serd_writer_write_statement, - (SerdEndSink)serd_writer_end_anon); - - serd_reader_set_strict(reader, !lax); - if (quiet) { - serd_reader_set_error_sink(reader, quiet_error_sink, NULL); - serd_writer_set_error_sink(writer, quiet_error_sink, NULL); - } - - SerdNode root = serd_node_from_string(SERD_URI, root_uri); - serd_writer_set_root_uri(writer, &root); - serd_writer_chop_blank_prefix(writer, chop_prefix); - serd_reader_add_blank_prefix(reader, add_prefix); - - SerdStatus st = SERD_SUCCESS; - if (!from_file) { - st = serd_reader_read_string(reader, input); - } else if (bulk_read) { - st = serd_reader_read_file_handle(reader, in_fd, in_name); - } else { - st = serd_reader_start_stream(reader, in_fd, in_name, false); - while (!st) { - st = serd_reader_read_chunk(reader); - } - serd_reader_end_stream(reader); - } - - serd_reader_free(reader); - serd_writer_finish(writer); - serd_writer_free(writer); - serd_env_free(env); - serd_node_free(&base); - free(input_path); - - if (from_file) { - fclose(in_fd); - } - - if (fclose(out_fd)) { - perror("serdi: write error"); - st = SERD_ERR_UNKNOWN; - } - - return (st > SERD_FAILURE) ? 1 : 0; + uint8_t* input_path = NULL; + const uint8_t* input = (const uint8_t*)argv[a++]; + if (from_file) { + in_name = in_name ? in_name : input; + if (!in_fd) { + if (!strncmp((const char*)input, "file:", 5)) { + input_path = serd_file_uri_parse(input, NULL); + input = input_path; + } + if (!input || !(in_fd = serd_fopen((const char*)input, "rb"))) { + return 1; + } + } + } + + if (!input_syntax && !(input_syntax = guess_syntax((const char*)in_name))) { + input_syntax = SERD_TRIG; + } + + if (!output_syntax) { + output_syntax = + ((input_syntax == SERD_TURTLE || input_syntax == SERD_NTRIPLES) + ? SERD_NTRIPLES + : SERD_NQUADS); + } + + const SerdStyle output_style = + choose_style(input_syntax, output_syntax, ascii, bulk_write, full_uris); + + SerdURI base_uri = SERD_URI_NULL; + SerdNode base = SERD_NODE_NULL; + if (a < argc) { // Base URI given on command line + base = + serd_node_new_uri_from_string((const uint8_t*)argv[a], NULL, &base_uri); + } else if (from_file && in_fd != stdin) { // Use input file URI + base = serd_node_new_file_uri(input, NULL, &base_uri, true); + } + + FILE* const out_fd = stdout; + SerdEnv* const env = serd_env_new(&base); + + SerdWriter* const writer = serd_writer_new( + output_syntax, output_style, env, &base_uri, serd_file_sink, out_fd); + + SerdReader* const reader = + serd_reader_new(input_syntax, + writer, + NULL, + (SerdBaseSink)serd_writer_set_base_uri, + (SerdPrefixSink)serd_writer_set_prefix, + (SerdStatementSink)serd_writer_write_statement, + (SerdEndSink)serd_writer_end_anon); + + serd_reader_set_strict(reader, !lax); + if (quiet) { + serd_reader_set_error_sink(reader, quiet_error_sink, NULL); + serd_writer_set_error_sink(writer, quiet_error_sink, NULL); + } + + SerdNode root = serd_node_from_string(SERD_URI, root_uri); + serd_writer_set_root_uri(writer, &root); + serd_writer_chop_blank_prefix(writer, chop_prefix); + serd_reader_add_blank_prefix(reader, add_prefix); + + SerdStatus st = SERD_SUCCESS; + if (!from_file) { + st = serd_reader_read_string(reader, input); + } else if (bulk_read) { + st = serd_reader_read_file_handle(reader, in_fd, in_name); + } else { + st = serd_reader_start_stream(reader, in_fd, in_name, false); + while (!st) { + st = serd_reader_read_chunk(reader); + } + serd_reader_end_stream(reader); + } + + serd_reader_free(reader); + serd_writer_finish(writer); + serd_writer_free(writer); + serd_env_free(env); + serd_node_free(&base); + free(input_path); + + if (from_file) { + fclose(in_fd); + } + + if (fclose(out_fd)) { + perror("serdi: write error"); + st = SERD_ERR_UNKNOWN; + } + + return (st > SERD_FAILURE) ? 1 : 0; } |