aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2018-05-27 15:48:25 +0200
committerDavid Robillard <d@drobilla.net>2022-01-28 21:57:29 -0500
commitf93a441065a611cc32874dde67e53a8295c87baf (patch)
tree3793e2df1c365bf93fbe700c9428b54668f08c53 /tools
parent2c5bee49b4494e172c4fa147af91bad199ed9362 (diff)
downloadserd-f93a441065a611cc32874dde67e53a8295c87baf.tar.gz
serd-f93a441065a611cc32874dde67e53a8295c87baf.tar.bz2
serd-f93a441065a611cc32874dde67e53a8295c87baf.zip
[WIP] Add validation
Diffstat (limited to 'tools')
-rw-r--r--tools/meson.build13
-rw-r--r--tools/serd-validate.c479
2 files changed, 486 insertions, 6 deletions
diff --git a/tools/meson.build b/tools/meson.build
index d4964784..da29c6dc 100644
--- a/tools/meson.build
+++ b/tools/meson.build
@@ -5,12 +5,6 @@ if get_option('static')
tool_link_args += ['-static']
endif
-tools = [
- 'filter',
- 'pipe',
- 'sort',
-]
-
serd_filter = executable('serd-filter',
['serd-filter.c', 'console.c'],
c_args: tool_c_args,
@@ -31,3 +25,10 @@ serd_sort = executable('serd-sort',
link_args: tool_link_args,
install: true,
dependencies: serd_dep)
+
+serd_validate = executable('serd-validate',
+ ['serd-validate.c', 'console.c'],
+ c_args: tool_c_args,
+ link_args: tool_link_args,
+ install: true,
+ dependencies: serd_dep)
diff --git a/tools/serd-validate.c b/tools/serd-validate.c
new file mode 100644
index 00000000..fd203611
--- /dev/null
+++ b/tools/serd-validate.c
@@ -0,0 +1,479 @@
+/*
+ Copyright 2011-2022 David Robillard <d@drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#include "console.h"
+
+#include "serd/serd.h"
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define NS_OWL "http://www.w3.org/2002/07/owl#"
+#define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+#define NS_RDFS "http://www.w3.org/2000/01/rdf-schema#"
+
+/* Application (after parsing command-line arguments) */
+
+#define SERDI_ERROR(msg) fprintf(stderr, "serdi: " msg)
+#define SERDI_ERRORF(fmt, ...) fprintf(stderr, "serdi: " fmt, __VA_ARGS__)
+
+typedef struct {
+ const char* base_uri_string;
+ const char* out_filename;
+ char* const* inputs;
+ intptr_t n_inputs;
+ SerdSyntaxOptions input_options;
+ size_t block_size;
+ size_t stack_size;
+ bool verbose;
+ bool quiet;
+} Options;
+
+static SerdStatus
+consume_source(SerdWorld* const world,
+ const Options opts,
+ SerdSyntax syntax,
+ SerdEnv* const env,
+ const SerdSink* const sink,
+ SerdInputStream input,
+ const SerdNode* const name)
+{
+ if (!input.stream) {
+ return SERD_UNKNOWN_ERROR;
+ }
+
+ SerdStatus st = SERD_SUCCESS;
+ SerdReader* const reader = serd_reader_new(
+ world, syntax, opts.input_options.flags, env, sink, opts.stack_size);
+
+ if (!(st = serd_reader_start(reader, &input, name, opts.block_size))) {
+ st = serd_reader_read_document(reader);
+ }
+
+ serd_reader_free(reader);
+ serd_close_input(&input);
+ return st;
+}
+
+static SerdStatus
+read_file(SerdWorld* const world,
+ const Options opts,
+ SerdEnv* const env,
+ const SerdSink* const sink,
+ const char* const filename)
+{
+ SerdStatus st = SERD_SUCCESS;
+ if (!opts.base_uri_string && strcmp(filename, "-")) {
+ if ((st = serd_set_base_uri_from_path(env, filename))) {
+ SERDI_ERRORF("failed to determine base URI from path %s\n", filename);
+ return st;
+ }
+ }
+
+ const SerdNode* const name =
+ serd_nodes_string(serd_world_nodes(world), SERD_STRING(filename));
+
+ st = consume_source(
+ world,
+ opts,
+ serd_choose_syntax(world, opts.input_options, filename, SERD_TRIG),
+ env,
+ sink,
+ serd_open_tool_input(filename),
+ name);
+
+ return st;
+}
+
+static SerdEnv*
+build_env(SerdWorld* const world, Options opts)
+{
+ char* const* const inputs = opts.inputs;
+ const intptr_t n_inputs = opts.n_inputs;
+
+ if (!opts.base_uri_string && n_inputs == 1) {
+ // Choose base URI from the single input path
+ char* const input_path = serd_canonical_path(NULL, inputs[0]);
+
+ SerdNode* base =
+ input_path
+ ? serd_new_file_uri(NULL, SERD_STRING(input_path), SERD_EMPTY_STRING())
+ : NULL;
+ if (!base) {
+ SERDI_ERRORF("unable to determine base URI from path %s\n", inputs[0]);
+ }
+
+ SerdEnv* const env = serd_env_new(
+ world, base ? serd_node_string_view(base) : SERD_EMPTY_STRING());
+
+ serd_free(NULL, input_path);
+ serd_node_free(NULL, base);
+ return env;
+ }
+
+ return serd_env_new(world,
+ opts.base_uri_string ? SERD_STRING(opts.base_uri_string)
+ : SERD_EMPTY_STRING());
+}
+
+static SerdModel*
+build_model(SerdWorld* const world, const Options opts, bool with_graphs)
+{
+ (void)opts; // FIXME
+
+ SerdModel* const model = serd_model_new(
+ world,
+ with_graphs ? SERD_ORDER_GSPO : SERD_ORDER_SPO,
+ (with_graphs * (unsigned)SERD_STORE_GRAPHS) | SERD_STORE_CARETS);
+
+ with_graphs = true; // FIXME
+
+ if (with_graphs) {
+ serd_model_add_index(model, SERD_ORDER_GSPO);
+ }
+
+ serd_model_add_index(model, SERD_ORDER_POS);
+ serd_model_add_index(model, SERD_ORDER_GPOS);
+
+ serd_model_add_index(model, SERD_ORDER_PSO);
+ serd_model_add_index(model, SERD_ORDER_GPSO);
+
+ serd_model_add_index(model, SERD_ORDER_OPS);
+ if (with_graphs) {
+ serd_model_add_index(model, SERD_ORDER_GOPS);
+ }
+
+ return model;
+}
+
+static bool
+input_has_graphs(const Options opts)
+{
+ if (opts.input_options.syntax) {
+ return serd_syntax_has_graphs(opts.input_options.syntax);
+ }
+
+ for (intptr_t i = 0u; i < opts.n_inputs; ++i) {
+ if (serd_syntax_has_graphs(serd_guess_syntax(opts.inputs[i]))) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static SerdStatus
+read_inputs(SerdWorld* world,
+ const Options opts,
+ SerdEnv* env,
+ const SerdSink* const sink)
+{
+ SerdStatus st = SERD_SUCCESS;
+
+ size_t prefix_len = 0;
+ char* prefix = NULL;
+ if (opts.n_inputs > 1) {
+ prefix_len = 32; // FIXME
+ prefix = (char*)calloc(1, prefix_len);
+ }
+
+ for (intptr_t i = 0; !st && i < opts.n_inputs; ++i) {
+ if (opts.n_inputs > 1) {
+ snprintf(prefix, prefix_len, "f%" PRIdPTR, i);
+ }
+
+ st = read_file(world, opts, env, sink, opts.inputs[i]);
+ }
+
+ free(prefix);
+ return st;
+}
+
+/*
+ Return a model where every object is the file URI of a schema to load.
+
+ The statements in the result are like `?ontology rdfs:seeAlso ?resource`,
+ where `?ontology` is the URI of the owl:Ontology instance and `?resource` is
+ a file URI.
+*/
+static SerdModel*
+get_schemas_model(const Options opts,
+ SerdWorld* const world,
+ SerdModel* const model)
+{
+ static const SerdStringView s_rdf_type = SERD_STRING(NS_RDF "type");
+ static const SerdStringView s_owl_Ontology = SERD_STRING(NS_OWL "Ontology");
+ static const SerdStringView s_rdfs_seeAlso = SERD_STRING(NS_RDFS "seeAlso");
+
+ SerdNodes* const nodes = serd_world_nodes(world);
+ SerdModel* const schemas_model =
+ serd_model_new(world, SERD_ORDER_SPO, SERD_STORE_CARETS);
+
+ const SerdNode* const rdf_type = serd_nodes_uri(nodes, s_rdf_type);
+ const SerdNode* const owl_Ontology = serd_nodes_uri(nodes, s_owl_Ontology);
+ const SerdNode* const rdfs_seeAlso = serd_nodes_uri(nodes, s_rdfs_seeAlso);
+
+ SerdCursor* const i =
+ serd_model_find(model, NULL, rdf_type, owl_Ontology, NULL);
+
+ for (; !serd_cursor_is_end(i); serd_cursor_advance(i)) {
+ const SerdStatement* const typing = serd_cursor_get(i);
+ const SerdNode* const ontology = serd_statement_subject(typing);
+
+ const SerdStatement* const link =
+ serd_model_get_statement(model, ontology, rdfs_seeAlso, NULL, NULL);
+ if (link) {
+ const SerdNode* const resource = serd_statement_object(link);
+ if (resource && serd_node_type(resource) == SERD_URI) {
+ if (opts.verbose) {
+ serd_logf_at(world,
+ SERD_LOG_LEVEL_NOTICE,
+ serd_statement_caret(link),
+ "Loading %s",
+ serd_node_string(resource));
+ }
+
+ const char* const resource_uri = serd_node_string(resource);
+ if (!strncmp(resource_uri, "file://", strlen("file://"))) {
+ serd_model_add(schemas_model, ontology, rdfs_seeAlso, resource, NULL);
+ }
+ }
+ }
+ }
+
+ serd_cursor_free(i);
+
+ return schemas_model;
+}
+
+static SerdStatus
+run(Options opts, int argc, char** argv)
+{
+ const bool with_graphs = input_has_graphs(opts);
+
+ SerdOutputStream out = serd_open_tool_output(opts.out_filename);
+ if (!out.stream) {
+ perror("error opening output file");
+ return SERD_UNKNOWN_ERROR;
+ }
+
+ SerdWorld* const world = serd_world_new(NULL);
+
+ const SerdNode* const schema_graph =
+ serd_nodes_uri(serd_world_nodes(world),
+ SERD_STRING("http://drobilla.net/sw/serd#schemas"));
+
+ const SerdNode* const data_graph = serd_nodes_uri(
+ serd_world_nodes(world), SERD_STRING("http://drobilla.net/sw/serd#data"));
+
+ SerdEnv* const env = build_env(world, opts);
+ SerdModel* const model = build_model(world, opts, with_graphs);
+ SerdSink* const schema_sink = serd_inserter_new(model, schema_graph);
+ SerdSink* const data_sink = serd_inserter_new(model, data_graph);
+ if (!schema_sink || !data_sink) {
+ SERDI_ERROR("failed to construct data pipeline, aborting\n");
+ return SERD_BAD_ARG; // FIXME: ?
+ }
+
+ if (opts.quiet) {
+ serd_set_log_func(world, serd_quiet_log_func, NULL);
+ }
+
+ SerdStatus st = read_inputs(world, opts, env, data_sink);
+
+ if (st <= SERD_FAILURE) { // FIXME: ?
+ SerdValidator* const validator = serd_validator_new(world);
+ bool checks_given = false;
+
+ for (int i = 1; i < argc; ++i) {
+ if (argv[i][0] == '-') {
+ if (argv[i][1] == 'W') {
+ serd_validator_enable_checks(validator, argv[++i]);
+ checks_given = true;
+ } else if (argv[i][1] == 'X') {
+ serd_validator_disable_checks(validator, argv[++i]);
+ checks_given = true;
+ } else if (argv[i][1] == 's') {
+ st = read_file(world, opts, env, schema_sink, argv[++i]);
+ }
+ }
+ }
+
+ if (!checks_given) {
+ serd_validator_enable_checks(validator, "all");
+ }
+
+ {
+ SerdModel* const schemas_model = get_schemas_model(opts, world, model);
+
+ SerdCursor* const i = serd_model_begin(schemas_model);
+ for (; !serd_cursor_is_end(i); serd_cursor_advance(i)) {
+ const SerdStatement* const link = serd_cursor_get(i);
+ const SerdNode* const resource = serd_statement_object(link);
+ const char* const resource_uri = serd_node_string(resource);
+
+ char* const path = serd_parse_file_uri(NULL, resource_uri, NULL);
+ if (path) {
+ st = read_file(world, opts, env, schema_sink, path);
+ serd_free(NULL, path);
+ }
+ }
+
+ serd_cursor_free(i);
+ serd_model_free(schemas_model);
+ }
+
+ st = serd_validate(validator, model, data_graph, env);
+
+ serd_validator_free(validator);
+ }
+
+ serd_sink_free(data_sink);
+ serd_sink_free(schema_sink);
+ serd_model_free(model);
+ serd_env_free(env);
+ serd_world_free(world);
+
+ if (serd_close_output(&out)) {
+ perror("serdi: write error");
+ st = SERD_UNKNOWN_ERROR;
+ }
+
+ return st;
+}
+
+/* Command-line interface (before setting up serd) */
+
+static SerdStatus
+print_usage(const char* const name, const bool error)
+{
+ static const char* const description =
+ "Validate RDF data against RDFS and OWL schemas.\n"
+ "INPUT can be a local filename, or \"-\" to read from standard input.\n\n"
+ " -B BASE_URI Base URI or path for resolving relative references.\n"
+ " -I SYNTAX Input syntax (turtle/ntriples/trig/nquads),\n"
+ " or option (lax/variables/relative/global/generated).\n"
+ " -V Display version information and exit.\n"
+ " -W CHECKS Enable checks matching regex CHECKS (or \"all\").\n"
+ " -X CHECKS Exclude checks matching regex CHECKS (or \"all\").\n"
+ " -b BYTES I/O block size.\n"
+ " -h Display this help and exit.\n"
+ " -k BYTES Parser stack size.\n"
+ " -v Print verbose messages about loaded resources.\n"
+ " -s SCHEMA Schema input file.\n";
+
+ FILE* const os = error ? stderr : stdout;
+ fprintf(os, "%s", error ? "\n" : "");
+ fprintf(os, "Usage: %s [OPTION]... INPUT...\n", name);
+ fprintf(os, "%s", description);
+ return error ? SERD_BAD_ARG : SERD_SUCCESS;
+}
+
+static SerdStatus
+parse_option(OptionIter* iter, Options* const opts)
+{
+#define ARG_ERRORF(fmt, ...) \
+ fprintf(stderr, "%s: " fmt, iter->argv[0], __VA_ARGS__)
+
+ const char opt = iter->argv[iter->a][iter->f];
+ const char* argument = NULL;
+
+ switch (opt) {
+ case 'B':
+ return serd_get_argument(iter, &opts->base_uri_string);
+
+ case 'I':
+ return serd_parse_input_argument(iter, &opts->input_options);
+
+ case 'V':
+ return serd_print_version("serd-validate");
+
+ case 'W':
+ case 'X':
+ // Just enable validation and skip the pattern, checks are parsed later
+ return serd_get_argument(iter, &argument);
+
+ case 'b':
+ return serd_get_size_argument(iter, &opts->block_size);
+
+ case 'h':
+ print_usage(iter->argv[0], false);
+ return SERD_FAILURE;
+
+ case 'k':
+ return serd_get_size_argument(iter, &opts->stack_size);
+
+ case 'q':
+ opts->quiet = true;
+ return serd_option_iter_advance(iter);
+
+ case 's':
+ // Schema input, ignore here since these are loaded later
+ return serd_get_argument(iter, &argument);
+
+ case 'v':
+ opts->verbose = true;
+ return serd_option_iter_advance(iter);
+
+ case 'w':
+ return serd_get_argument(iter, &opts->out_filename);
+
+ default:
+ break;
+ }
+
+ ARG_ERRORF("invalid option -- '%c'\n", opt);
+ return print_usage(iter->argv[0], true);
+}
+
+int
+main(int argc, char** argv)
+{
+ Options opts = {NULL,
+ NULL,
+ NULL,
+ 0,
+ {SERD_SYNTAX_EMPTY, 0u, false},
+ 4096u,
+ 4194304u,
+ false,
+ false};
+
+ // Parse all command line options (which must precede inputs)
+ SerdStatus st = SERD_SUCCESS;
+ OptionIter iter = {argv, argc, 1, 1};
+ while (!serd_option_iter_is_end(iter)) {
+ if ((st = parse_option(&iter, &opts))) {
+ return (st > SERD_FAILURE);
+ }
+ }
+
+ // Every argument past the last option is an input
+ opts.inputs = argv + iter.a;
+ opts.n_inputs = argc - iter.a;
+ if (opts.n_inputs == 0) {
+ fprintf(stderr, "%s: missing input\n", argv[0]);
+ print_usage(argv[0], true);
+ return EXIT_FAILURE;
+ }
+
+ st = st ? st : run(opts, argc, argv);
+
+ return (st <= SERD_FAILURE) ? 0 : (int)st;
+}