/* Copyright 2011-2022 David Robillard Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include "console.h" #include "serd/serd.h" #include #include #include #include #include #define NS_OWL "http://www.w3.org/2002/07/owl#" #define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#" #define NS_RDFS "http://www.w3.org/2000/01/rdf-schema#" /* Application (after parsing command-line arguments) */ #define SERDI_ERROR(msg) fprintf(stderr, "serdi: " msg) #define SERDI_ERRORF(fmt, ...) fprintf(stderr, "serdi: " fmt, __VA_ARGS__) typedef struct { const char* base_uri_string; const char* out_filename; char* const* inputs; intptr_t n_inputs; SerdSyntaxOptions input_options; size_t block_size; size_t stack_size; bool verbose; bool quiet; } Options; static SerdStatus consume_source(SerdWorld* const world, const Options opts, SerdSyntax syntax, SerdEnv* const env, const SerdSink* const sink, SerdInputStream input, const SerdNode* const name) { if (!input.stream) { return SERD_UNKNOWN_ERROR; } SerdStatus st = SERD_SUCCESS; SerdReader* const reader = serd_reader_new( world, syntax, opts.input_options.flags, env, sink, opts.stack_size); if (!(st = serd_reader_start(reader, &input, name, opts.block_size))) { st = serd_reader_read_document(reader); } serd_reader_free(reader); serd_close_input(&input); return st; } static SerdStatus read_file(SerdWorld* const world, const Options opts, SerdEnv* const env, const SerdSink* const sink, const char* const filename) { SerdStatus st = SERD_SUCCESS; if (!opts.base_uri_string && strcmp(filename, "-")) { if ((st = serd_set_base_uri_from_path(env, filename))) { SERDI_ERRORF("failed to determine base URI from path %s\n", filename); return st; } } const SerdNode* const name = serd_nodes_string(serd_world_nodes(world), SERD_STRING(filename)); st = consume_source( world, opts, serd_choose_syntax(world, opts.input_options, filename, SERD_TRIG), env, sink, serd_open_tool_input(filename), name); return st; } static SerdEnv* build_env(SerdWorld* const world, Options opts) { char* const* const inputs = opts.inputs; const intptr_t n_inputs = opts.n_inputs; if (!opts.base_uri_string && n_inputs == 1) { // Choose base URI from the single input path char* const input_path = serd_canonical_path(NULL, inputs[0]); SerdNode* base = input_path ? serd_new_file_uri(NULL, SERD_STRING(input_path), SERD_EMPTY_STRING()) : NULL; if (!base) { SERDI_ERRORF("unable to determine base URI from path %s\n", inputs[0]); } SerdEnv* const env = serd_env_new( world, base ? serd_node_string_view(base) : SERD_EMPTY_STRING()); serd_free(NULL, input_path); serd_node_free(NULL, base); return env; } return serd_env_new(world, opts.base_uri_string ? SERD_STRING(opts.base_uri_string) : SERD_EMPTY_STRING()); } static SerdModel* build_model(SerdWorld* const world, const Options opts, bool with_graphs) { (void)opts; // FIXME SerdModel* const model = serd_model_new( world, with_graphs ? SERD_ORDER_GSPO : SERD_ORDER_SPO, (with_graphs * (unsigned)SERD_STORE_GRAPHS) | SERD_STORE_CARETS); with_graphs = true; // FIXME if (with_graphs) { serd_model_add_index(model, SERD_ORDER_GSPO); } serd_model_add_index(model, SERD_ORDER_POS); serd_model_add_index(model, SERD_ORDER_GPOS); serd_model_add_index(model, SERD_ORDER_PSO); serd_model_add_index(model, SERD_ORDER_GPSO); serd_model_add_index(model, SERD_ORDER_OPS); if (with_graphs) { serd_model_add_index(model, SERD_ORDER_GOPS); } return model; } static bool input_has_graphs(const Options opts) { if (opts.input_options.syntax) { return serd_syntax_has_graphs(opts.input_options.syntax); } for (intptr_t i = 0u; i < opts.n_inputs; ++i) { if (serd_syntax_has_graphs(serd_guess_syntax(opts.inputs[i]))) { return true; } } return false; } static SerdStatus read_inputs(SerdWorld* world, const Options opts, SerdEnv* env, const SerdSink* const sink) { SerdStatus st = SERD_SUCCESS; size_t prefix_len = 0; char* prefix = NULL; if (opts.n_inputs > 1) { prefix_len = 32; // FIXME prefix = (char*)calloc(1, prefix_len); } for (intptr_t i = 0; !st && i < opts.n_inputs; ++i) { if (opts.n_inputs > 1) { snprintf(prefix, prefix_len, "f%" PRIdPTR, i); } st = read_file(world, opts, env, sink, opts.inputs[i]); } free(prefix); return st; } /* Return a model where every object is the file URI of a schema to load. The statements in the result are like `?ontology rdfs:seeAlso ?resource`, where `?ontology` is the URI of the owl:Ontology instance and `?resource` is a file URI. */ static SerdModel* get_schemas_model(const Options opts, SerdWorld* const world, SerdModel* const model) { static const SerdStringView s_rdf_type = SERD_STRING(NS_RDF "type"); static const SerdStringView s_owl_Ontology = SERD_STRING(NS_OWL "Ontology"); static const SerdStringView s_rdfs_seeAlso = SERD_STRING(NS_RDFS "seeAlso"); SerdNodes* const nodes = serd_world_nodes(world); SerdModel* const schemas_model = serd_model_new(world, SERD_ORDER_SPO, SERD_STORE_CARETS); const SerdNode* const rdf_type = serd_nodes_uri(nodes, s_rdf_type); const SerdNode* const owl_Ontology = serd_nodes_uri(nodes, s_owl_Ontology); const SerdNode* const rdfs_seeAlso = serd_nodes_uri(nodes, s_rdfs_seeAlso); SerdCursor* const i = serd_model_find(model, NULL, rdf_type, owl_Ontology, NULL); for (; !serd_cursor_is_end(i); serd_cursor_advance(i)) { const SerdStatement* const typing = serd_cursor_get(i); const SerdNode* const ontology = serd_statement_subject(typing); const SerdStatement* const link = serd_model_get_statement(model, ontology, rdfs_seeAlso, NULL, NULL); if (link) { const SerdNode* const resource = serd_statement_object(link); if (resource && serd_node_type(resource) == SERD_URI) { if (opts.verbose) { serd_logf_at(world, SERD_LOG_LEVEL_NOTICE, serd_statement_caret(link), "Loading %s", serd_node_string(resource)); } const char* const resource_uri = serd_node_string(resource); if (!strncmp(resource_uri, "file://", strlen("file://"))) { serd_model_add(schemas_model, ontology, rdfs_seeAlso, resource, NULL); } } } } serd_cursor_free(i); return schemas_model; } static SerdStatus run(Options opts, int argc, char** argv) { const bool with_graphs = input_has_graphs(opts); SerdOutputStream out = serd_open_tool_output(opts.out_filename); if (!out.stream) { perror("error opening output file"); return SERD_UNKNOWN_ERROR; } SerdWorld* const world = serd_world_new(NULL); const SerdNode* const schema_graph = serd_nodes_uri(serd_world_nodes(world), SERD_STRING("http://drobilla.net/sw/serd#schemas")); const SerdNode* const data_graph = serd_nodes_uri( serd_world_nodes(world), SERD_STRING("http://drobilla.net/sw/serd#data")); SerdEnv* const env = build_env(world, opts); SerdModel* const model = build_model(world, opts, with_graphs); SerdSink* const schema_sink = serd_inserter_new(model, schema_graph); SerdSink* const data_sink = serd_inserter_new(model, data_graph); if (!schema_sink || !data_sink) { SERDI_ERROR("failed to construct data pipeline, aborting\n"); return SERD_BAD_ARG; // FIXME: ? } if (opts.quiet) { serd_set_log_func(world, serd_quiet_log_func, NULL); } SerdStatus st = read_inputs(world, opts, env, data_sink); if (st <= SERD_FAILURE) { // FIXME: ? SerdValidator* const validator = serd_validator_new(world); bool checks_given = false; for (int i = 1; i < argc; ++i) { if (argv[i][0] == '-') { if (argv[i][1] == 'W') { serd_validator_enable_checks(validator, argv[++i]); checks_given = true; } else if (argv[i][1] == 'X') { serd_validator_disable_checks(validator, argv[++i]); checks_given = true; } else if (argv[i][1] == 's') { st = read_file(world, opts, env, schema_sink, argv[++i]); } } } if (!checks_given) { serd_validator_enable_checks(validator, "all"); } { SerdModel* const schemas_model = get_schemas_model(opts, world, model); SerdCursor* const i = serd_model_begin(schemas_model); for (; !serd_cursor_is_end(i); serd_cursor_advance(i)) { const SerdStatement* const link = serd_cursor_get(i); const SerdNode* const resource = serd_statement_object(link); const char* const resource_uri = serd_node_string(resource); char* const path = serd_parse_file_uri(NULL, resource_uri, NULL); if (path) { st = read_file(world, opts, env, schema_sink, path); serd_free(NULL, path); } } serd_cursor_free(i); serd_model_free(schemas_model); } st = serd_validate(validator, model, data_graph, env); serd_validator_free(validator); } serd_sink_free(data_sink); serd_sink_free(schema_sink); serd_model_free(model); serd_env_free(env); serd_world_free(world); if (serd_close_output(&out)) { perror("serdi: write error"); st = SERD_UNKNOWN_ERROR; } return st; } /* Command-line interface (before setting up serd) */ static SerdStatus print_usage(const char* const name, const bool error) { static const char* const description = "Validate RDF data against RDFS and OWL schemas.\n" "INPUT can be a local filename, or \"-\" to read from standard input.\n\n" " -B BASE_URI Base URI or path for resolving relative references.\n" " -I SYNTAX Input syntax (turtle/ntriples/trig/nquads),\n" " or option (lax/variables/relative/global/generated).\n" " -V Display version information and exit.\n" " -W CHECKS Enable checks matching regex CHECKS (or \"all\").\n" " -X CHECKS Exclude checks matching regex CHECKS (or \"all\").\n" " -b BYTES I/O block size.\n" " -h Display this help and exit.\n" " -k BYTES Parser stack size.\n" " -v Print verbose messages about loaded resources.\n" " -s SCHEMA Schema input file.\n"; FILE* const os = error ? stderr : stdout; fprintf(os, "%s", error ? "\n" : ""); fprintf(os, "Usage: %s [OPTION]... INPUT...\n", name); fprintf(os, "%s", description); return error ? SERD_BAD_ARG : SERD_SUCCESS; } static SerdStatus parse_option(OptionIter* iter, Options* const opts) { #define ARG_ERRORF(fmt, ...) \ fprintf(stderr, "%s: " fmt, iter->argv[0], __VA_ARGS__) const char opt = iter->argv[iter->a][iter->f]; const char* argument = NULL; switch (opt) { case 'B': return serd_get_argument(iter, &opts->base_uri_string); case 'I': return serd_parse_input_argument(iter, &opts->input_options); case 'V': return serd_print_version("serd-validate"); case 'W': case 'X': // Just enable validation and skip the pattern, checks are parsed later return serd_get_argument(iter, &argument); case 'b': return serd_get_size_argument(iter, &opts->block_size); case 'h': print_usage(iter->argv[0], false); return SERD_FAILURE; case 'k': return serd_get_size_argument(iter, &opts->stack_size); case 'q': opts->quiet = true; return serd_option_iter_advance(iter); case 's': // Schema input, ignore here since these are loaded later return serd_get_argument(iter, &argument); case 'v': opts->verbose = true; return serd_option_iter_advance(iter); case 'w': return serd_get_argument(iter, &opts->out_filename); default: break; } ARG_ERRORF("invalid option -- '%c'\n", opt); return print_usage(iter->argv[0], true); } int main(int argc, char** argv) { Options opts = {NULL, NULL, NULL, 0, {SERD_SYNTAX_EMPTY, 0u, false}, 4096u, 4194304u, false, false}; // Parse all command line options (which must precede inputs) SerdStatus st = SERD_SUCCESS; OptionIter iter = {argv, argc, 1, 1}; while (!serd_option_iter_is_end(iter)) { if ((st = parse_option(&iter, &opts))) { return (st > SERD_FAILURE); } } // Every argument past the last option is an input opts.inputs = argv + iter.a; opts.n_inputs = argc - iter.a; if (opts.n_inputs == 0) { fprintf(stderr, "%s: missing input\n", argv[0]); print_usage(argv[0], true); return EXIT_FAILURE; } st = st ? st : run(opts, argc, argv); return (st <= SERD_FAILURE) ? 0 : (int)st; }