aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2018-05-27 15:48:25 +0200
committerDavid Robillard <d@drobilla.net>2019-04-13 19:48:23 +0200
commit25a0c50bb95f9523e700a2ca91b6b4a61c8243c5 (patch)
treeabcfe360917617b09de82ca01a4c361186cde36c
parent2afbb905c47b12dab0ceddbe8516675d291a4345 (diff)
downloadserd-25a0c50bb95f9523e700a2ca91b6b4a61c8243c5.tar.gz
serd-25a0c50bb95f9523e700a2ca91b6b4a61c8243c5.tar.bz2
serd-25a0c50bb95f9523e700a2ca91b6b4a61c8243c5.zip
WIP: Add validation
-rw-r--r--NEWS1
-rw-r--r--serd/serd.h7
-rw-r--r--src/serd_validate.c159
-rw-r--r--src/string.c1
-rw-r--r--src/validate.c866
-rw-r--r--src/world.c4
-rw-r--r--tests/serd_test.c2
-rw-r--r--wscript48
8 files changed, 1073 insertions, 15 deletions
diff --git a/NEWS b/NEWS
index 48441498..274704ba 100644
--- a/NEWS
+++ b/NEWS
@@ -14,6 +14,7 @@ serd (1.0.0) unstable;
* Simplify streaming API and improve pretty printing
* Add logging functions to public API
* Add model for storing statements in memory
+ * Add support for validation
-- David Robillard <d@drobilla.net> Sat, 19 Jan 2019 13:31:12 +0100
diff --git a/serd/serd.h b/serd/serd.h
index 6d95ad22..0b798abc 100644
--- a/serd/serd.h
+++ b/serd/serd.h
@@ -109,7 +109,8 @@ typedef enum {
SERD_ERR_ID_CLASH, ///< Encountered clashing blank node IDs
SERD_ERR_BAD_CURIE, ///< Invalid CURIE (e.g. prefix does not exist)
SERD_ERR_INTERNAL, ///< Unexpected internal error (should not happen)
- SERD_ERR_OVERFLOW ///< Stack overflow
+ SERD_ERR_OVERFLOW, ///< Stack overflow
+ SERD_ERR_INVALID ///< Invalid data
} SerdStatus;
/// RDF syntax type
@@ -1485,6 +1486,10 @@ SERD_API
SerdStatus
serd_model_erase_range(SerdModel* model, SerdRange* range);
+SERD_API
+SerdStatus
+serd_validate(const SerdModel* model);
+
/**
@}
@name Inserter
diff --git a/src/serd_validate.c b/src/serd_validate.c
new file mode 100644
index 00000000..40da8618
--- /dev/null
+++ b/src/serd_validate.c
@@ -0,0 +1,159 @@
+/*
+ Copyright 2012-2018 David Robillard <http://drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#define _BSD_SOURCE 1 // for realpath
+#define _DEFAULT_SOURCE 1 // for realpath
+
+#include "serd_config.h"
+
+#include "serd/serd.h"
+
+#include <limits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifdef _WIN32
+#include <windows.h>
+#endif
+
+#define CERRORF(fmt, ...) fprintf(stderr, "serd_validate: " fmt, __VA_ARGS__);
+
+static int
+print_version(void)
+{
+ printf("serd_validate " SERD_VERSION
+ " <http://drobilla.net/software/serd>\n");
+ printf("Copyright 2012-2018 David Robillard <http://drobilla.net>.\n"
+ "License: <http://www.opensource.org/licenses/isc>\n"
+ "This is free software; you are free to change and redistribute it."
+ "\nThere is NO WARRANTY, to the extent permitted by law.\n");
+ return 0;
+}
+
+static int
+print_usage(const char* name, bool error)
+{
+ FILE* const os = error ? stderr : stdout;
+ fprintf(os, "Usage: %s [OPTION]... INPUT...\n", name);
+ fprintf(os, "Validate RDF data\n\n");
+ fprintf(os, " -h Display this help and exit\n");
+ fprintf(os, " -l Print errors on a single line.\n");
+ fprintf(os, " -v Display version information and exit\n");
+ fprintf(os,
+ "Validate RDF data. This is a simple validator which checks\n"
+ "that all used properties are actually defined. It does not do\n"
+ "any fancy file retrieval, the files passed on the command line\n"
+ "are the only data that is read. In other words, you must pass\n"
+ "the definition of all vocabularies used on the command line.\n");
+ return error ? 1 : 0;
+}
+
+static char*
+absolute_path(const char* path)
+{
+#ifdef _WIN32
+ char* out = (char*)malloc(MAX_PATH);
+ GetFullPathName(path, MAX_PATH, out, NULL);
+ return out;
+#else
+ return realpath(path, NULL);
+#endif
+}
+
+static int
+missing_arg(const char* name, char opt)
+{
+ CERRORF("option requires an argument -- '%c'\n", opt);
+ return print_usage(name, true);
+}
+
+int
+main(int argc, char** argv)
+{
+ if (argc < 2) {
+ return print_usage(argv[0], true);
+ }
+
+ int a = 1;
+ size_t stack_size = 4194304;
+ for (; a < argc && argv[a][0] == '-'; ++a) {
+ if (argv[a][1] == 'h') {
+ return print_usage(argv[0], false);
+ } else if (argv[a][1] == 'k') {
+ if (++a == argc) {
+ return missing_arg(argv[0], 'k');
+ }
+ char* endptr = NULL;
+ const long size = strtol(argv[a], &endptr, 10);
+ if (size <= 0 || size == LONG_MAX || *endptr != '\0') {
+ CERRORF("invalid stack size `%s'\n", argv[a]);
+ return 1;
+ }
+ stack_size = (size_t)size;
+ } else if (argv[a][1] == 'v') {
+ return print_version();
+ } else {
+ CERRORF("invalid option -- '%s'\n", argv[a] + 1);
+ return print_usage(argv[0], true);
+ }
+ }
+
+ SerdWorld* world = serd_world_new();
+ const SerdModelFlags indices = SERD_INDEX_SPO | SERD_INDEX_OPS;
+ const SerdModelFlags flags = indices | SERD_STORE_CURSORS;
+ SerdModel* model = serd_model_new(world, flags);
+ SerdEnv* env = serd_env_new(NULL);
+ SerdInserter* inserter = serd_inserter_new(model, env, NULL);
+ SerdReader* reader = serd_reader_new(
+ world, SERD_TURTLE, serd_inserter_get_sink(inserter), stack_size);
+
+ for (; a < argc; ++a) {
+ const char* input = argv[a];
+ char* in_path = absolute_path(input);
+
+ if (!in_path) {
+ CERRORF("unable to open file %s\n", input);
+ continue;
+ }
+
+ SerdNode* base_uri_node = serd_new_file_uri(in_path, NULL);
+
+ serd_env_set_base_uri(env, base_uri_node);
+ SerdStatus st = serd_reader_start_file(reader, input, true);
+ st = st ? st : serd_reader_read_document(reader);
+ st = st ? st : serd_reader_finish(reader);
+
+ if (st) {
+ CERRORF("error reading %s: %s\n", in_path, serd_strerror(st));
+ return 1;
+ }
+
+ serd_node_free(base_uri_node);
+ free(in_path);
+ }
+
+ serd_reader_free(reader);
+ serd_inserter_free(inserter);
+ serd_env_free(env);
+
+ const SerdStatus st = serd_validate(model);
+
+ serd_model_free(model);
+ serd_world_free(world);
+
+ return (int)st;
+}
diff --git a/src/string.c b/src/string.c
index 10cd537a..c2466c48 100644
--- a/src/string.c
+++ b/src/string.c
@@ -45,6 +45,7 @@ serd_strerror(SerdStatus status)
case SERD_ERR_BAD_CURIE: return "Invalid CURIE";
case SERD_ERR_INTERNAL: return "Internal error";
case SERD_ERR_OVERFLOW: return "Stack overflow";
+ case SERD_ERR_INVALID: return "Invalid data";
}
return "Unknown error"; // never reached
}
diff --git a/src/validate.c b/src/validate.c
new file mode 100644
index 00000000..a52f92b0
--- /dev/null
+++ b/src/validate.c
@@ -0,0 +1,866 @@
+/*
+ Copyright 2012-2018 David Robillard <http://drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#define _BSD_SOURCE 1 // for realpath
+#define _DEFAULT_SOURCE 1 // for realpath
+
+#include "serd_config.h"
+
+#include "model.h"
+#include "serd/serd.h"
+
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef HAVE_PCRE
+#include <pcre.h>
+#endif
+
+#define NS_owl "http://www.w3.org/2002/07/owl#"
+#define NS_rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+#define NS_rdfs "http://www.w3.org/2000/01/rdf-schema#"
+#define NS_xsd "http://www.w3.org/2001/XMLSchema#"
+
+#define VERRORF(ctx, statement, fmt, ...) \
+ report(ctx, statement, SERD_LOG_LEVEL_ERROR, fmt, __VA_ARGS__);
+
+#define VERROR(ctx, statement, fmt) \
+ report(ctx, statement, SERD_LOG_LEVEL_ERROR, fmt);
+
+#define VWARNF(ctx, statement, fmt, ...) \
+ report(ctx, statement, SERD_LOG_LEVEL_WARNING, fmt, __VA_ARGS__);
+
+#define VNOTEF(ctx, statement, fmt, ...) \
+ report(ctx, statement, SERD_LOG_LEVEL_INFO, fmt, __VA_ARGS__);
+
+#define VNOTE(ctx, statement, fmt) \
+ report(ctx, statement, SERD_LOG_LEVEL_INFO, fmt);
+
+#define SERD_FOREACH(name, range) \
+ for (const SerdStatement* (name) = NULL; \
+ !serd_range_empty(range) && ((name) = serd_range_front(range)); \
+ serd_range_next(range))
+
+typedef struct
+{
+ SerdNode* owl_Class;
+ SerdNode* owl_DatatypeProperty;
+ SerdNode* owl_FunctionalProperty;
+ SerdNode* owl_InverseFunctionalProperty;
+ SerdNode* owl_ObjectProperty;
+ SerdNode* owl_Restriction;
+ SerdNode* owl_Thing;
+ SerdNode* owl_allValuesFrom;
+ SerdNode* owl_cardinality;
+ SerdNode* owl_equivalentClass;
+ SerdNode* owl_maxCardinality;
+ SerdNode* owl_minCardinality;
+ SerdNode* owl_onDatatype;
+ SerdNode* owl_onProperty;
+ SerdNode* owl_someValuesFrom;
+ SerdNode* owl_withRestrictions;
+ SerdNode* rdf_PlainLiteral;
+ SerdNode* rdf_Property;
+ SerdNode* rdf_first;
+ SerdNode* rdf_rest;
+ SerdNode* rdf_type;
+ SerdNode* rdfs_Class;
+ SerdNode* rdfs_Datatype;
+ SerdNode* rdfs_Literal;
+ SerdNode* rdfs_Resource;
+ SerdNode* rdfs_domain;
+ SerdNode* rdfs_label;
+ SerdNode* rdfs_range;
+ SerdNode* rdfs_subClassOf;
+ SerdNode* xsd_anyURI;
+ SerdNode* xsd_float;
+ SerdNode* xsd_decimal;
+ SerdNode* xsd_double;
+ SerdNode* xsd_maxExclusive;
+ SerdNode* xsd_maxInclusive;
+ SerdNode* xsd_minExclusive;
+ SerdNode* xsd_minInclusive;
+ SerdNode* xsd_pattern;
+ SerdNode* xsd_string;
+ SerdNode* sentinel;
+} URIs;
+
+typedef struct
+{
+ URIs uris;
+ const SerdModel* model;
+ unsigned n_errors;
+ unsigned n_restrictions;
+} ValidationContext;
+
+static int
+check_class_restriction(ValidationContext* ctx,
+ const SerdNode* restriction,
+ const SerdStatement* statement,
+ const SerdNode* instance);
+
+static int
+report(ValidationContext* ctx,
+ const SerdStatement* statement,
+ const SerdLogLevel level,
+ const char* fmt,
+ ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ const SerdMessage msg = { SERD_ERR_INVALID,
+ level,
+ serd_statement_get_cursor(statement),
+ fmt,
+ &args };
+ serd_world_log(ctx->model->world, &msg);
+ va_end(args);
+
+ ++ctx->n_errors;
+ return 1;
+}
+
+static bool
+check(ValidationContext* ctx, const bool value)
+{
+ ++ctx->n_restrictions;
+ return value;
+}
+
+/** Return true iff `child` is a descendant of `parent` by `pred` arcs.
+ *
+ * That is, returns true if there is a path from `child` to `parent` by
+ * following `pred` arcs starting from child.
+ */
+static bool
+is_descendant(ValidationContext* ctx,
+ const SerdNode* child,
+ const SerdNode* parent,
+ const SerdNode* pred)
+{
+ if (serd_node_equals(child, parent) ||
+ serd_model_ask(
+ ctx->model, child, ctx->uris.owl_equivalentClass, parent, NULL)) {
+ return true;
+ }
+
+ SerdRange* i = serd_model_range(ctx->model, child, pred, NULL, NULL);
+ SERD_FOREACH (s, i) {
+ const SerdNode* o = serd_statement_get_object(s);
+ if (serd_node_equals(child, o)) {
+ continue; // Weird class is explicitly a descendent of itself
+ }
+ if (is_descendant(ctx, o, parent, pred)) {
+ serd_range_free(i);
+ return true;
+ }
+ }
+ serd_range_free(i);
+
+ return false;
+}
+
+/** Return true iff `child` is a subclass of `parent`. */
+static bool
+is_subclass(ValidationContext* ctx,
+ const SerdNode* child,
+ const SerdNode* parent)
+{
+ return is_descendant(ctx, child, parent, ctx->uris.rdfs_subClassOf);
+}
+
+/** Return true iff `child` is a sub-datatype of `parent`. */
+static bool
+is_subdatatype(ValidationContext* ctx,
+ const SerdNode* child,
+ const SerdNode* parent)
+{
+ return is_descendant(ctx, child, parent, ctx->uris.owl_onDatatype);
+}
+
+static bool
+regexp_match(ValidationContext* ctx,
+ const SerdStatement* pat_statement,
+ const char* pat,
+ const char* str)
+{
+#ifdef HAVE_PCRE
+ // Append a $ to the pattern so we only match if the entire string matches
+ const size_t len = strlen(pat);
+ char* const regx = (char*)malloc(len + 2);
+ memcpy(regx, pat, len);
+ regx[len] = '$';
+ regx[len + 1] = '\0';
+
+ const char* err;
+ int erroffset;
+ pcre* re = pcre_compile(regx, PCRE_ANCHORED, &err, &erroffset, NULL);
+ free(regx);
+ if (!re) {
+ VERRORF(ctx,
+ pat_statement,
+ "Error in pattern \"%s\" at offset %d (%s)\n",
+ pat,
+ erroffset,
+ err);
+ return false;
+ }
+
+ const bool ret =
+ pcre_exec(re, NULL, str, (int)strlen(str), 0, 0, NULL, 0) >= 0;
+
+ pcre_free(re);
+ return ret;
+#else
+ (void)ctx;
+ (void)pat_statement;
+ (void)pat;
+ (void)str;
+#endif // HAVE_PCRE
+ return true;
+}
+
+static int
+bound_cmp(ValidationContext* ctx,
+ const SerdNode* literal,
+ const SerdNode* type,
+ const SerdNode* bound)
+{
+ const char* str = serd_node_get_string(literal);
+ const char* bound_str = serd_node_get_string(bound);
+ const bool is_numeric = (is_subdatatype(ctx, type, ctx->uris.xsd_decimal) ||
+ is_subdatatype(ctx, type, ctx->uris.xsd_double));
+
+ if (is_numeric) {
+ const double fbound = serd_strtod(bound_str, NULL);
+ const double fliteral = serd_strtod(str, NULL);
+ return ((fliteral < fbound) ? -1 : (fliteral > fbound) ? 1 : 0);
+ } else {
+ return strcmp(str, bound_str);
+ }
+}
+
+static bool
+check_literal_restriction(ValidationContext* ctx,
+ const SerdStatement* statement,
+ const SerdNode* literal,
+ const SerdNode* type,
+ const SerdNode* restriction)
+{
+ const char* str = serd_node_get_string(literal);
+
+ // Check xsd:pattern
+ const SerdStatement* pat_statement = serd_model_get_statement(
+ ctx->model, restriction, ctx->uris.xsd_pattern, 0, 0);
+ if (pat_statement) {
+ const SerdNode* pat_node = serd_statement_get_object(pat_statement);
+ const char* pat = serd_node_get_string(pat_node);
+ if (check(ctx, !regexp_match(ctx, pat_statement, pat, str))) {
+ VERRORF(ctx,
+ statement,
+ "Value \"%s\" does not match pattern \"%s\"\n",
+ serd_node_get_string(literal),
+ pat);
+ return false;
+ }
+ }
+
+ // Check xsd:minInclusive
+ const SerdNode* lower = serd_model_get(
+ ctx->model, restriction, ctx->uris.xsd_minInclusive, 0, 0);
+ if (lower) {
+ if (check(ctx, bound_cmp(ctx, literal, type, lower) < 0)) {
+ VERRORF(ctx,
+ statement,
+ "Value \"%s\" < minimum \"%s\"\n",
+ serd_node_get_string(literal),
+ serd_node_get_string(lower));
+ return false;
+ }
+ }
+
+ // Check xsd:maxInclusive
+ const SerdNode* upper = serd_model_get(
+ ctx->model, restriction, ctx->uris.xsd_maxInclusive, 0, 0);
+ if (upper) {
+ if (check(ctx, bound_cmp(ctx, literal, type, upper) > 0)) {
+ VERRORF(ctx,
+ statement,
+ "Value \"%s\" > than maximum \"%s\"\n",
+ serd_node_get_string(literal),
+ serd_node_get_string(upper));
+ return false;
+ }
+ }
+
+ // Check xsd:minExclusive
+ const SerdNode* elower = serd_model_get(
+ ctx->model, restriction, ctx->uris.xsd_minExclusive, 0, 0);
+ if (elower) {
+ if (check(ctx, bound_cmp(ctx, literal, type, elower) <= 0)) {
+ VERRORF(ctx,
+ statement,
+ "Value \"%s\" <= exclusive minimum \"%s\"\n",
+ serd_node_get_string(literal),
+ serd_node_get_string(elower));
+ return false;
+ }
+ }
+
+ // Check xsd:maxExclusive
+ const SerdNode* eupper = serd_model_get(
+ ctx->model, restriction, ctx->uris.xsd_maxExclusive, 0, 0);
+ if (eupper) {
+ if (check(ctx, bound_cmp(ctx, literal, type, eupper) >= 0)) {
+ VERRORF(ctx,
+ statement,
+ "Value \"%s\" >= exclusive maximum \"%s\"\n",
+ serd_node_get_string(literal),
+ serd_node_get_string(eupper));
+ return false;
+ }
+ ++ctx->n_restrictions;
+ }
+
+ return true; // Unknown restriction, be quietly tolerant
+}
+
+static bool
+is_datatype(ValidationContext* ctx, const SerdNode* dtype)
+{
+ SerdRange* t =
+ serd_model_range(ctx->model, dtype, ctx->uris.rdf_type, NULL, NULL);
+ SERD_FOREACH (s, t) {
+ const SerdNode* type = serd_statement_get_object(s);
+ if (is_subdatatype(ctx, type, ctx->uris.rdfs_Datatype)) {
+ serd_range_free(t);
+ return true; // Subdatatype of rdfs:Datatype
+ }
+ }
+ serd_range_free(t);
+
+ return false;
+}
+
+static bool
+literal_is_valid(ValidationContext* ctx,
+ const SerdStatement* statement,
+ const SerdNode* literal,
+ const SerdNode* type)
+{
+ if (!type) {
+ return true;
+ }
+
+ // Check that datatype is defined
+ const SerdNode* datatype = serd_node_get_datatype(literal);
+ if (datatype && !is_datatype(ctx, datatype)) {
+ VERRORF(ctx,
+ statement,
+ "Datatype <%s> is not defined\n",
+ serd_node_get_string(datatype));
+ return false;
+ }
+
+ // Find restrictions list
+ const SerdNode* head =
+ serd_model_get(ctx->model, type, ctx->uris.owl_withRestrictions, 0, 0);
+
+ // Walk list, checking each restriction
+ while (head) {
+ SerdIter* f =
+ serd_model_find(ctx->model, head, ctx->uris.rdf_first, 0, 0);
+ if (!f) {
+ break;
+ }
+
+ const SerdNode* first = serd_statement_get_object(serd_iter_get(f));
+
+ // Check this restriction
+ if (!check_literal_restriction(ctx, statement, literal, type, first)) {
+ VNOTEF(ctx,
+ serd_iter_get(f),
+ "Restriction on <%s>\n",
+ serd_node_get_string(type));
+ serd_iter_free(f);
+ return false;
+ }
+
+ // Seek to next list node
+ head = serd_model_get(ctx->model, head, ctx->uris.rdf_rest, 0, 0);
+ serd_iter_free(f);
+ }
+
+ // Recurse up datatype hierarchy
+ const SerdNode* super =
+ serd_model_get(ctx->model, type, ctx->uris.owl_onDatatype, 0, 0);
+ return super ? literal_is_valid(ctx, statement, literal, super) : true;
+}
+
+static bool
+is_a(ValidationContext* ctx, const SerdNode* subject, const SerdNode* type)
+{
+ return serd_model_ask(ctx->model, subject, ctx->uris.rdf_type, type, 0);
+}
+
+static bool
+has_explicit_type(ValidationContext* ctx,
+ const SerdNode* node,
+ const SerdNode* klass)
+{
+ if (is_a(ctx, node, klass)) {
+ return true; // Directly stated to be an instance
+ }
+
+ SerdRange* t =
+ serd_model_range(ctx->model, node, ctx->uris.rdf_type, NULL, NULL);
+ SERD_FOREACH (s, t) {
+ if (is_subclass(ctx, serd_statement_get_object(s), klass)) {
+ serd_range_free(t);
+ return true; // Explicit instance of a subclass
+ }
+ }
+
+ serd_range_free(t);
+ return false;
+}
+
+static bool
+is_instance_of(ValidationContext* ctx,
+ const SerdNode* node,
+ const SerdNode* klass)
+{
+ if (!serd_model_ask(ctx->model, node, NULL, NULL, NULL)) {
+ /* Nothing about this node known in the model at all, assume it is some
+ external resource we can't validate. */
+ return true;
+ } else if (serd_node_get_type(node) == SERD_BLANK) {
+ /* Be permissive for blank nodes and don't require explicit type
+ annotation, to avoid countless errors with things like lists. */
+ return true;
+ }
+
+ return (has_explicit_type(ctx, node, klass) ||
+ serd_node_equals(klass, ctx->uris.rdfs_Resource) ||
+ serd_node_equals(klass, ctx->uris.owl_Thing));
+}
+
+static bool
+check_instance_type(ValidationContext* ctx,
+ const SerdStatement* statement,
+ const SerdNode* node,
+ const SerdNode* klass)
+{
+ if (is_subclass(ctx, klass, ctx->uris.rdfs_Literal) ||
+ is_a(ctx, klass, ctx->uris.rdfs_Datatype)) {
+ VERROR(ctx, statement, "Class instance found where literal expected\n");
+ return false;
+ }
+
+ if (is_a(ctx, klass, ctx->uris.owl_Restriction)) {
+ if (check_class_restriction(ctx, klass, statement, node)) {
+ return false;
+ }
+ }
+
+ SerdRange* r = serd_model_range(
+ ctx->model, klass, ctx->uris.rdfs_subClassOf, NULL, NULL);
+ SERD_FOREACH (s, r) {
+ const SerdNode* super = serd_statement_get_object(s);
+ if (!serd_node_equals(super, klass) &&
+ !check_instance_type(ctx, statement, node, super)) {
+ serd_range_free(r);
+ return false;
+ }
+ }
+ serd_range_free(r);
+
+ if (!is_instance_of(ctx, node, klass)) {
+ VERRORF(ctx,
+ statement,
+ "Node %s is not an instance of %s\n",
+ serd_node_get_string(node),
+ serd_node_get_string(klass));
+ return false;
+ }
+
+ return true;
+}
+
+static bool
+check_type(ValidationContext* ctx,
+ const SerdStatement* statement,
+ const SerdNode* node,
+ const SerdNode* type)
+{
+ if (serd_node_equals(type, ctx->uris.rdfs_Resource) ||
+ serd_node_equals(type, ctx->uris.owl_Thing)) {
+ return true; // Trivially true for everything (more or less)
+ }
+
+ if (serd_node_get_type(node) == SERD_LITERAL) {
+ if (serd_node_equals(type, ctx->uris.rdfs_Literal)) {
+ return true; // Trivially true for a literal
+ } else if (serd_node_equals(type, ctx->uris.rdf_PlainLiteral)) {
+ if (serd_node_get_datatype(node)) {
+ VERRORF(ctx,
+ statement,
+ "Literal \"%s\" should be plain, but has datatype "
+ "<%s>\n",
+ serd_node_get_string(node),
+ serd_node_get_string(serd_node_get_datatype(node)));
+ return false;
+ }
+ } else if (!is_a(ctx, type, ctx->uris.rdfs_Datatype)) {
+ VERRORF(ctx,
+ statement,
+ "Literal \"%s\" where instance of <%s> expected\n",
+ serd_node_get_string(node),
+ serd_node_get_string(type));
+ return false;
+ } else {
+ return literal_is_valid(ctx, statement, node, type);
+ }
+ } else if (serd_node_get_type(node) == SERD_URI) {
+ if (!is_subdatatype(ctx, type, ctx->uris.xsd_anyURI)) {
+ // Only check if type is not anyURI, since node is a URI
+ return check_instance_type(ctx, statement, node, type);
+ }
+ } else {
+ return check_instance_type(ctx, statement, node, type);
+ }
+
+ return true;
+}
+
+static uint64_t
+count_non_blanks(SerdRange* i, SerdField field)
+{
+ uint64_t n = 0;
+ SERD_FOREACH (s, i) {
+ const SerdNode* node = serd_statement_get_node(s, field);
+ if (serd_node_get_type(node) != SERD_BLANK) {
+ ++n;
+ }
+ }
+ return n;
+}
+
+static int
+check_statement(ValidationContext* ctx, const SerdStatement* statement)
+{
+ int st = 0;
+ const URIs* uris = &ctx->uris;
+ const SerdNode* subj = serd_statement_get_subject(statement);
+ const SerdNode* pred = serd_statement_get_predicate(statement);
+ const SerdNode* obj = serd_statement_get_object(statement);
+
+ if (serd_node_equals(pred, uris->rdf_type)) {
+ // Type statement, check that object is a valid instance of type
+ check_type(ctx, statement, subj, obj);
+ }
+
+ if (!serd_model_ask(ctx->model, pred, uris->rdfs_label, 0, 0)) {
+ // Warn if property has no label
+ st = VWARNF(ctx,
+ statement,
+ "Property <%s> has no label\n",
+ serd_node_get_string(pred));
+ }
+
+ if (serd_node_get_type(obj) == SERD_LITERAL &&
+ !literal_is_valid(ctx, statement, obj, serd_node_get_datatype(obj))) {
+ st = SERD_ERR_INVALID;
+ }
+
+ // Check restrictions based on property type
+ if (is_a(ctx, pred, uris->owl_DatatypeProperty)) {
+ if (serd_node_get_type(obj) != SERD_LITERAL) {
+ st = VERROR(ctx, statement,
+ "Datatype property with non-literal value\n");
+ }
+ } else if (is_a(ctx, pred, uris->owl_ObjectProperty)) {
+ if (serd_node_get_type(obj) == SERD_LITERAL) {
+ st = VERROR(ctx, statement, "Object property with literal value\n");
+ }
+ } else if (is_a(ctx, pred, uris->owl_FunctionalProperty)) {
+ SerdRange* o = serd_model_range(ctx->model, subj, pred, NULL, NULL);
+ const uint64_t n = count_non_blanks(o, SERD_OBJECT);
+ if (n > 1) {
+ st = VERRORF(ctx, statement,
+ "Functional property with %u objects\n", n);
+ }
+ serd_range_free(o);
+ } else if (is_a(ctx, pred, uris->owl_InverseFunctionalProperty)) {
+ SerdRange* s = serd_model_range(ctx->model, NULL, pred, obj, NULL);
+ const uint64_t n = count_non_blanks(s, SERD_SUBJECT);
+ if (n > 1) {
+ st = VERRORF(ctx, statement,
+ "Inverse functional property with %u subjects\n", n);
+ }
+ serd_range_free(s);
+ } else {
+ SerdRange* t = serd_model_range(ctx->model, pred, uris->rdf_type, 0, 0);
+
+ bool is_property = false;
+ SERD_FOREACH (s, t) {
+ const SerdNode* type = serd_statement_get_object(s);
+ if (is_subclass(ctx, type, uris->rdf_Property)) {
+ is_property = true;
+ break;
+ }
+ }
+
+ if (!is_property) {
+ st = VERROR(ctx, statement, "Use of undefined property\n");
+ }
+
+ serd_range_free(t);
+ }
+
+ // Check range
+ SerdRange* r = serd_model_range(ctx->model, pred, uris->rdfs_range, 0, 0);
+ SERD_FOREACH (s, r) {
+ const SerdNode* range = serd_statement_get_object(s);
+ if (!has_explicit_type(ctx, obj, range) &&
+ !check_type(ctx, statement, obj, range)) {
+ VNOTEF(ctx, serd_range_front(r),
+ "In range of <%s>\n", serd_node_get_string(pred));
+ }
+ }
+ serd_range_free(r);
+
+ // Check domain
+ SerdRange* d = serd_model_range(ctx->model, pred, uris->rdfs_domain, 0, 0);
+ SERD_FOREACH (s, d) {
+ const SerdNode* domain = serd_statement_get_object(s);
+ if (!has_explicit_type(ctx, subj, domain) &&
+ !check_type(ctx, statement, subj, domain)) {
+ VNOTEF(ctx, serd_range_front(d),
+ "In domain of <%s>\n", serd_node_get_string(pred));
+ }
+ }
+ serd_range_free(d);
+
+ return st;
+}
+
+static int
+cardinality_error(ValidationContext* ctx,
+ const SerdStatement* statement,
+ const SerdStatement* restriction_statement,
+ const SerdNode* property,
+ const uint64_t actual_values,
+ const char* comparison,
+ const uint64_t expected_values)
+{
+ const int st = VERRORF(ctx,
+ statement,
+ "Property <%s> has %u %s %u values\n",
+ serd_node_get_string(property),
+ actual_values,
+ comparison,
+ expected_values);
+ VNOTE(ctx, restriction_statement, "Restriction here\n");
+ return st;
+}
+
+static int
+check_class_restriction(ValidationContext* ctx,
+ const SerdNode* restriction,
+ const SerdStatement* statement,
+ const SerdNode* instance)
+{
+ int st = 0;
+ const SerdNode* prop = serd_model_get(
+ ctx->model, restriction, ctx->uris.owl_onProperty, NULL, NULL);
+ if (!prop) {
+ return 0;
+ }
+
+ const uint64_t values =
+ serd_model_count(ctx->model, instance, prop, NULL, NULL);
+
+ // Check exact cardinality
+ const SerdStatement* c = serd_model_get_statement(
+ ctx->model, restriction, ctx->uris.owl_cardinality, NULL, NULL);
+ if (c) {
+ const SerdNode* card = serd_statement_get_object(c);
+ const uint64_t count = strtoul(serd_node_get_string(card), NULL, 10);
+ if (check(ctx, values != count)) {
+ st = cardinality_error(
+ ctx, statement, c, prop, values, "!=", count);
+ }
+ }
+
+ // Check minimum cardinality
+ const SerdStatement* l = serd_model_get_statement(
+ ctx->model, restriction, ctx->uris.owl_minCardinality, NULL, NULL);
+ if (l) {
+ const SerdNode* card = serd_statement_get_object(l);
+ const uint64_t count = strtoul(serd_node_get_string(card), NULL, 10);
+ if (check(ctx, values < count)) {
+ st = cardinality_error(ctx, statement, l, prop, values, "<", count);
+ }
+ }
+
+ // Check maximum cardinality
+ const SerdStatement* u = serd_model_get_statement(
+ ctx->model, restriction, ctx->uris.owl_maxCardinality, NULL, NULL);
+ if (u) {
+ const SerdNode* card = serd_statement_get_object(u);
+ const uint64_t count = strtoul(serd_node_get_string(card), NULL, 10);
+ if (check(ctx, values > count)) {
+ st = cardinality_error(ctx, statement, u, prop, values, ">", count);
+ }
+ }
+
+ // Check someValuesFrom
+ const SerdStatement* s = serd_model_get_statement(
+ ctx->model, restriction, ctx->uris.owl_someValuesFrom, 0, 0);
+ if (s) {
+ const SerdNode* some = serd_statement_get_object(s);
+
+ SerdRange* v = serd_model_range(ctx->model, instance, prop, NULL, NULL);
+ bool found = false;
+ SERD_FOREACH (i, v) {
+ const SerdNode* value = serd_statement_get_object(i);
+ if (check_type(ctx, statement, value, some)) {
+ found = true;
+ break;
+ }
+ }
+ if (check(ctx, !found)) {
+ st = VERRORF(ctx,
+ statement,
+ "%s has no <%s> values of type <%s>\n",
+ serd_node_get_string(instance),
+ serd_node_get_string(prop),
+ serd_node_get_string(some));
+ VNOTE(ctx, s, "Restriction here\n");
+ }
+ serd_range_free(v);
+ }
+
+ // Check allValuesFrom
+ const SerdStatement* a = serd_model_get_statement(
+ ctx->model, restriction, ctx->uris.owl_allValuesFrom, 0, 0);
+ if (a) {
+ ++ctx->n_restrictions;
+ const SerdNode* all = serd_statement_get_object(a);
+
+ SerdRange* v = serd_model_range(ctx->model, instance, prop, NULL, NULL);
+ SERD_FOREACH (i, v) {
+ const SerdNode* value = serd_statement_get_object(i);
+ if (!check_type(ctx, statement, value, all)) {
+ st = VERRORF(ctx,
+ i,
+ "<%s> value not of type <%s>\n",
+ serd_node_get_string(prop),
+ serd_node_get_string(all));
+ VNOTE(ctx, a, "Restriction here\n");
+ break;
+ }
+ }
+ serd_range_free(v);
+ }
+
+ return st;
+}
+
+static void
+init_uris(URIs* uris)
+{
+#define URI(prefix, suffix) \
+ uris->prefix##_##suffix = serd_new_uri(NS_##prefix #suffix)
+
+ URI(owl, Class);
+ URI(owl, DatatypeProperty);
+ URI(owl, FunctionalProperty);
+ URI(owl, InverseFunctionalProperty);
+ URI(owl, ObjectProperty);
+ URI(owl, Restriction);
+ URI(owl, Thing);
+ URI(owl, allValuesFrom);
+ URI(owl, cardinality);
+ URI(owl, equivalentClass);
+ URI(owl, maxCardinality);
+ URI(owl, minCardinality);
+ URI(owl, onDatatype);
+ URI(owl, onProperty);
+ URI(owl, someValuesFrom);
+ URI(owl, withRestrictions);
+ URI(rdf, PlainLiteral);
+ URI(rdf, Property);
+ URI(rdf, first);
+ URI(rdf, rest);
+ URI(rdf, type);
+ URI(rdfs, Class);
+ URI(rdfs, Datatype);
+ URI(rdfs, Literal);
+ URI(rdfs, Resource);
+ URI(rdfs, domain);
+ URI(rdfs, label);
+ URI(rdfs, range);
+ URI(rdfs, subClassOf);
+ URI(xsd, anyURI);
+ URI(xsd, float);
+ URI(xsd, decimal);
+ URI(xsd, double);
+ URI(xsd, maxExclusive);
+ URI(xsd, maxInclusive);
+ URI(xsd, minExclusive);
+ URI(xsd, minInclusive);
+ URI(xsd, pattern);
+ URI(xsd, string);
+}
+
+SerdStatus
+serd_validate(const SerdModel* model)
+{
+ ValidationContext ctx;
+ memset(&ctx, 0, sizeof(ValidationContext));
+ init_uris(&ctx.uris);
+
+ ctx.model = model;
+ ctx.n_errors = 0;
+ ctx.n_restrictions = 0;
+
+#ifndef HAVE_PCRE
+ fprintf(stderr, "warning: Built without PCRE, datatypes not checked.\n");
+#endif
+
+ int st = 0;
+ SerdRange* i = serd_model_all(ctx.model);
+ SERD_FOREACH (statement, i) {
+ st = check_statement(&ctx, statement) || st;
+ }
+ serd_range_free(i);
+
+ printf("Found %d errors (checked %d restrictions)\n",
+ ctx.n_errors,
+ ctx.n_restrictions);
+
+ for (SerdNode** n = (SerdNode**)&ctx.uris; *n; ++n) {
+ serd_node_free(*n);
+ }
+
+ return !st && ctx.n_errors == 0 ? SERD_SUCCESS : SERD_ERR_INVALID;
+}
diff --git a/src/world.c b/src/world.c
index 13b593b2..6209fb0a 100644
--- a/src/world.c
+++ b/src/world.c
@@ -53,10 +53,12 @@ serd_world_fopen(SerdWorld* world, const char* path, const char* mode)
SerdStatus
serd_world_log(const SerdWorld* world, const SerdMessage* msg)
{
+ static const char* level_strings[] = { "note", "warning", "error" };
+
if (world->msg_func) {
world->msg_func(world->msg_handle, msg);
} else {
- fprintf(stderr, "error: ");
+ fprintf(stderr, "%s: ", level_strings[msg->level]);
if (msg->cursor) {
fprintf(stderr,
"%s:%u:%u: ",
diff --git a/tests/serd_test.c b/tests/serd_test.c
index f923a9c0..933ea9a5 100644
--- a/tests/serd_test.c
+++ b/tests/serd_test.c
@@ -247,7 +247,7 @@ main(void)
const char* msg = NULL;
assert(!strcmp((msg = serd_strerror(SERD_SUCCESS)), "Success"));
- for (int i = SERD_FAILURE; i <= SERD_ERR_OVERFLOW; ++i) {
+ for (int i = SERD_FAILURE; i <= SERD_ERR_INVALID; ++i) {
msg = serd_strerror((SerdStatus)i);
assert(strcmp(msg, "Success"));
}
diff --git a/wscript b/wscript
index f047e25f..8e61d5c8 100644
--- a/wscript
+++ b/wscript
@@ -31,6 +31,7 @@ def options(ctx):
'no-shared': 'do not build shared library',
'static-progs': 'build programs as static binaries',
'largefile': 'build with large file support on 32-bit systems',
+ 'no-pcre': 'do not use PCRE, even if present',
'no-posix': 'do not use POSIX functions, even if present'})
def configure(conf):
@@ -63,6 +64,21 @@ def configure(conf):
defines = ['_POSIX_C_SOURCE=200809L'],
mandatory = False)
+ if not Options.options.no_pcre:
+ autowaf.check_pkg(conf, 'libpcre', uselib_store='PCRE', mandatory=False)
+
+ if conf.env.HAVE_PCRE:
+ if conf.check(cflags=['-pthread'], mandatory=False):
+ conf.env.PTHREAD_CFLAGS = ['-pthread']
+ if conf.env.CC_NAME != 'clang':
+ conf.env.PTHREAD_LINKFLAGS = ['-pthread']
+ elif conf.check(linkflags=['-lpthread'], mandatory=False):
+ conf.env.PTHREAD_CFLAGS = []
+ conf.env.PTHREAD_LINKFLAGS = ['-lpthread']
+ else:
+ conf.env.PTHREAD_CFLAGS = []
+ conf.env.PTHREAD_LINKFLAGS = []
+
autowaf.set_lib_env(conf, 'serd', SERD_VERSION)
conf.write_config_header('serd_config.h', remove=False)
@@ -92,6 +108,7 @@ lib_source = ['src/base64.c',
'src/syntax.c',
'src/system.c',
'src/uri.c',
+ 'src/validate.c',
'src/world.c',
'src/writer.c',
'src/zix/btree.c',
@@ -112,6 +129,7 @@ def build(bld):
'includes': ['.', './src'],
'cflags': ['-fvisibility=hidden'],
'lib': ['m'],
+ 'use': ['PCRE'],
'vnum': SERD_VERSION,
'install_path': '${LIBDIR}'}
if bld.env.MSVC_COMPILER:
@@ -172,18 +190,24 @@ def build(bld):
# Utilities
if bld.env.BUILD_UTILS:
- obj = bld(features = 'c cprogram',
- source = 'src/serdi.c',
- target = 'serdi',
- includes = ['.', './src'],
- use = 'libserd',
- lib = lib_args['lib'],
- install_path = '${BINDIR}')
- if not bld.env.BUILD_SHARED or bld.env.STATIC_PROGS:
- obj.use = 'libserd_static'
- if bld.env.STATIC_PROGS:
- obj.env.SHLIB_MARKER = obj.env.STLIB_MARKER
- obj.linkflags = ['-static']
+ for i in ['serdi', 'serd_validate']:
+ obj = bld(features = 'c cprogram',
+ source = 'src/%s.c' % i,
+ target = i,
+ includes = ['.', './src'],
+ use = 'libserd',
+ lib = lib_args['lib'],
+ linkflags = [],
+ install_path = '${BINDIR}')
+ if not bld.env.BUILD_SHARED or bld.env.STATIC_PROGS:
+ obj.use = 'libserd_static'
+ if bld.env.STATIC_PROGS:
+ obj.env.SHLIB_MARKER = obj.env.STLIB_MARKER
+ obj.linkflags += ['-static']
+ if i == 'serd_validate':
+ autowaf.use_lib(bld, obj, 'PCRE')
+ obj.cflags = bld.env.PTHREAD_CFLAGS
+ obj.linkflags += bld.env.PTHREAD_LINKFLAGS
# Documentation
autowaf.build_dox(bld, 'SERD', SERD_VERSION, top, out)