aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2018-05-27 15:48:25 +0200
committerDavid Robillard <d@drobilla.net>2022-01-28 21:57:29 -0500
commitf93a441065a611cc32874dde67e53a8295c87baf (patch)
tree3793e2df1c365bf93fbe700c9428b54668f08c53 /src
parent2c5bee49b4494e172c4fa147af91bad199ed9362 (diff)
downloadserd-f93a441065a611cc32874dde67e53a8295c87baf.tar.gz
serd-f93a441065a611cc32874dde67e53a8295c87baf.tar.bz2
serd-f93a441065a611cc32874dde67e53a8295c87baf.zip
[WIP] Add validation
Diffstat (limited to 'src')
-rw-r--r--src/log.c8
-rw-r--r--src/string.c2
-rw-r--r--src/validate.c2042
3 files changed, 2051 insertions, 1 deletions
diff --git a/src/log.c b/src/log.c
index dd62e5cc..41b4c8e5 100644
--- a/src/log.c
+++ b/src/log.c
@@ -158,8 +158,14 @@ serd_vxlogf(const SerdWorld* const world,
// Format and print the message itself
vfprintf(stderr, fmt, args);
- fprintf(stderr, "\n");
+ // Print clang-tidy-style check name (validation errors)
+ const char* const check = get_log_field(n_fields, fields, "SERD_CHECK");
+ if (check) {
+ fprintf(stderr, " [%s]", check);
+ }
+
+ fprintf(stderr, "\n");
return SERD_SUCCESS;
}
diff --git a/src/string.c b/src/string.c
index 1b9bb64a..77cd0191 100644
--- a/src/string.c
+++ b/src/string.c
@@ -75,6 +75,8 @@ serd_strerror(const SerdStatus status)
return "Invalid or unresolved URI";
case SERD_BAD_WRITE:
return "Error writing to file";
+ case SERD_BAD_DATA:
+ return "Invalid data";
}
return "Unknown error";
diff --git a/src/validate.c b/src/validate.c
new file mode 100644
index 00000000..b300c0af
--- /dev/null
+++ b/src/validate.c
@@ -0,0 +1,2042 @@
+/*
+ Copyright 2012-2021 David Robillard <d@drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#include "memory.h"
+
+#include "exess/exess.h"
+#include "rerex/rerex.h"
+#include "serd/serd.h"
+
+#include <assert.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define NS_owl "http://www.w3.org/2002/07/owl#"
+#define NS_rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+#define NS_rdfs "http://www.w3.org/2000/01/rdf-schema#"
+#define NS_xsd "http://www.w3.org/2001/XMLSchema#"
+
+#define SERD_FOREACH(name, range) \
+ for (const SerdStatement*(name) = NULL; \
+ !serd_cursor_is_end(range) && ((name) = serd_cursor_get(range)); \
+ serd_cursor_advance(range))
+
+#define SERD_FOREACH_NODE(field, name, range) \
+ for (const SerdNode*(name) = NULL; \
+ !serd_cursor_is_end(range) && \
+ ((name) = serd_statement_node( \
+ (const SerdStatement* SERD_NONNULL)serd_cursor_get(range), field)); \
+ serd_cursor_advance(range))
+
+#define QUOTE_FMT "%s%s%s"
+#define QUOTE_ARGS(quote) quote.prefix.buf, quote.string.buf, quote.suffix.buf
+
+#define N_CHECKS 34
+
+static const char* const check_names[N_CHECKS] = {
+ "nothing", //
+ "allValuesFrom", //
+ "anyUri", //
+ "cardinalityEqual", //
+ "cardinalityMax", //
+ "cardinalityMin", //
+ "classCycle", //
+ "classLabel", //
+ "datatypeCycle", //
+ "datatypeProperty", //
+ "datatypeType", //
+ "deprecatedClass", //
+ "deprecatedProperty", //
+ "explicitInstanceType", //
+ "functionalProperty", //
+ "instanceLiteral", //
+ "instanceType", //
+ "inverseFunctionalProperty", //
+ "literalInstance", //
+ "literalMaxExclusive", //
+ "literalMaxInclusive", //
+ "literalMinExclusive", //
+ "literalMinInclusive", //
+ "literalPattern", //
+ "literalRestriction", //
+ "literalValue", //
+ "objectProperty", //
+ "plainLiteralDatatype", //
+ "predicateType", //
+ "propertyCycle", //
+ "propertyDomain", //
+ "propertyLabel", //
+ "propertyRange", //
+ "someValuesFrom", //
+};
+
+/// Bitwise OR of SerdValidatorCheck values
+typedef uint64_t SerdValidatorChecks;
+
+typedef unsigned long Count;
+
+typedef struct {
+ const SerdNode* owl_Class;
+ const SerdNode* owl_DatatypeProperty;
+ const SerdNode* owl_DeprecatedClass;
+ const SerdNode* owl_DeprecatedProperty;
+ const SerdNode* owl_FunctionalProperty;
+ const SerdNode* owl_InverseFunctionalProperty;
+ const SerdNode* owl_ObjectProperty;
+ const SerdNode* owl_Restriction;
+ const SerdNode* owl_Thing;
+ const SerdNode* owl_allValuesFrom;
+ const SerdNode* owl_cardinality;
+ const SerdNode* owl_deprecated;
+ const SerdNode* owl_equivalentClass;
+ const SerdNode* owl_maxCardinality;
+ const SerdNode* owl_minCardinality;
+ const SerdNode* owl_onDatatype;
+ const SerdNode* owl_onProperty;
+ const SerdNode* owl_someValuesFrom;
+ const SerdNode* owl_unionOf;
+ const SerdNode* owl_withRestrictions;
+ const SerdNode* rdf_PlainLiteral;
+ const SerdNode* rdf_Property;
+ const SerdNode* rdf_XMLLiteral;
+ const SerdNode* rdf_first;
+ const SerdNode* rdf_rest;
+ const SerdNode* rdf_type;
+ const SerdNode* rdfs_Class;
+ const SerdNode* rdfs_Datatype;
+ const SerdNode* rdfs_Literal;
+ const SerdNode* rdfs_Resource;
+ const SerdNode* rdfs_domain;
+ const SerdNode* rdfs_label;
+ const SerdNode* rdfs_range;
+ const SerdNode* rdfs_subClassOf;
+ const SerdNode* rdfs_subPropertyOf;
+ const SerdNode* xsd_anyURI;
+ const SerdNode* xsd_maxExclusive;
+ const SerdNode* xsd_maxInclusive;
+ const SerdNode* xsd_minExclusive;
+ const SerdNode* xsd_minInclusive;
+ const SerdNode* xsd_pattern;
+} URIs;
+
+struct SerdValidatorImpl {
+ const SerdWorld* world;
+ const SerdEnv* env;
+ const SerdModel* model;
+ const SerdNode* graph;
+ const SerdNode* true_node;
+ URIs uris;
+ SerdValidatorChecks checks;
+ unsigned n_errors;
+ unsigned n_checks;
+ bool suppressed;
+};
+
+typedef struct {
+ const char* name;
+} Check;
+
+static SerdStatus
+check_instance_type(SerdValidator* ctx,
+ SerdValidatorCheck check,
+ const SerdNode* root_klass,
+ const SerdStatement* statement,
+ const SerdNode* instance,
+ const SerdNode* klass);
+
+static SerdStatus
+check_instance_restriction(SerdValidator* ctx,
+ const SerdNode* root_klass,
+ const SerdStatement* statement,
+ const SerdNode* instance,
+ const SerdNode* restriction);
+
+typedef struct {
+ SerdStringView prefix;
+ SerdStringView string;
+ SerdStringView suffix;
+} NodeQuote;
+
+static NodeQuote
+format_node(const SerdValidator* const ctx, const SerdNode* const node)
+{
+ const SerdStringView empty = SERD_EMPTY_STRING();
+ NodeQuote q = {empty, empty, empty};
+ SerdStringView prefix = empty;
+ SerdStringView suffix = empty;
+ const SerdStringView node_view = serd_node_string_view(node);
+
+ switch (serd_node_type(node)) {
+ case SERD_LITERAL:
+ q.prefix = SERD_STRING("\"");
+ q.string = node_view;
+ q.suffix = SERD_STRING("\"");
+ break;
+ case SERD_URI:
+ if (!serd_env_qualify(ctx->env, node_view, &prefix, &suffix)) {
+ q.prefix = prefix;
+ q.string = SERD_STRING(":");
+ q.suffix = suffix;
+ } else {
+ q.prefix = SERD_STRING("<");
+ q.string = node_view;
+ q.suffix = SERD_STRING(">");
+ }
+ break;
+ case SERD_BLANK:
+ q.prefix = SERD_STRING("_:");
+ q.string = node_view;
+ break;
+ case SERD_VARIABLE:
+ q.prefix = SERD_STRING("?");
+ q.string = node_view;
+ break;
+ };
+
+ return q;
+}
+
+SERD_LOG_FUNC(5, 0)
+static void
+vreportf(SerdValidator* const ctx,
+ const SerdValidatorCheck check,
+ const SerdLogLevel level,
+ const SerdStatement* const statement,
+ const char* const fmt,
+ va_list args)
+{
+ const char* file = NULL;
+ char line[24] = {0};
+ char col[24] = {0};
+ char status_string[12] = {0};
+
+ snprintf(status_string, sizeof(status_string), "%d", SERD_BAD_DATA);
+
+ const SerdCaret* const caret =
+ statement ? serd_statement_caret(statement) : NULL;
+
+ if (caret) {
+ file = serd_node_string(serd_caret_name(caret));
+
+ snprintf(line, sizeof(line), "%u", serd_caret_line(caret));
+ snprintf(col, sizeof(col), "%u", serd_caret_column(caret));
+ }
+
+ const SerdLogField fields[] = {{"SERD_STATUS", status_string},
+ {"SERD_CHECK", check_names[check]},
+ {"SERD_FILE", file},
+ {"SERD_LINE", line},
+ {"SERD_COL", col}};
+
+ serd_vxlogf(ctx->world, level, caret ? 5u : 2u, fields, fmt, args);
+}
+
+SERD_LOG_FUNC(5, 6)
+static SerdStatus
+report_check(SerdValidator* const ctx,
+ const SerdStatement* const statement,
+ const SerdValidatorCheck check,
+ const bool condition,
+ const char* const fmt,
+ ...)
+{
+ if (!ctx->suppressed) {
+ if (!(ctx->checks & (1ull << check))) {
+ return SERD_SUCCESS;
+ }
+
+ ctx->n_checks += 1;
+ ctx->n_errors += condition ? 0 : 1;
+
+ if (!condition) {
+ va_list args;
+ va_start(args, fmt);
+ vreportf(ctx, check, SERD_LOG_LEVEL_ERROR, statement, fmt, args);
+ va_end(args);
+ }
+ }
+
+ return condition ? SERD_SUCCESS : SERD_BAD_DATA;
+}
+
+SERD_LOG_FUNC(4, 5)
+static void
+log_note(SerdValidator* const ctx,
+ const SerdStatement* const statement,
+ const SerdValidatorCheck check,
+ const char* const fmt,
+ ...)
+{
+ if (!ctx->suppressed) {
+ va_list args;
+ va_start(args, fmt);
+ vreportf(ctx, check, SERD_LOG_LEVEL_NOTICE, statement, fmt, args);
+ va_end(args);
+ }
+}
+
+/*
+ Return true iff `child` is a descendant of `parent` by `pred` arcs.
+
+ That is, returns true if there is a path from `child` to `parent` by
+ following `pred` arcs starting from child.
+*/
+static bool
+is_descendant(SerdValidator* const ctx,
+ const SerdNode* const child,
+ const SerdNode* const parent,
+ const SerdNode* const pred)
+{
+ if (serd_node_equals(child, parent) ||
+ serd_model_ask(
+ ctx->model, child, ctx->uris.owl_equivalentClass, parent, NULL)) {
+ return true;
+ }
+
+ SerdCursor* const i = serd_model_find(ctx->model, child, pred, NULL, NULL);
+ SERD_FOREACH_NODE (SERD_OBJECT, o, i) {
+ if (!serd_node_equals(child, o) && is_descendant(ctx, o, parent, pred)) {
+ serd_cursor_free(i);
+ return true;
+ }
+ }
+
+ serd_cursor_free(i);
+ return false;
+}
+
+// Return true iff `klass` is a subclass of `super`
+static bool
+is_subclass(SerdValidator* const ctx,
+ const SerdNode* const klass,
+ const SerdNode* const super)
+{
+ return serd_node_equals(klass, super) ||
+ is_descendant(ctx, klass, super, ctx->uris.rdfs_subClassOf);
+}
+
+// Return true iff `datatype` is a subdatatype of `super`
+static bool
+is_subdatatype(SerdValidator* const ctx,
+ const SerdNode* const datatype,
+ const SerdNode* const super)
+{
+ return serd_node_equals(datatype, super) ||
+ is_descendant(ctx, datatype, super, ctx->uris.owl_onDatatype);
+}
+
+static void
+update_status(SerdStatus* const old, const SerdStatus next)
+{
+ *old = next > *old ? next : *old;
+}
+
+static SerdStatus
+merge_status(const SerdStatus a, const SerdStatus b)
+{
+ return a > b ? a : b;
+}
+
+static int
+bound_cmp(SerdValidator* const ctx,
+ const SerdStatement* const literal_statement,
+ const SerdNode* const literal,
+ const SerdNode* const type,
+ const SerdStatement* const bound_statement,
+ const SerdNode* const bound)
+{
+ const ExessDatatype value_type =
+ exess_datatype_from_uri(serd_node_string(type));
+
+ if (value_type == EXESS_NOTHING) {
+ return strcmp(serd_node_string(literal), serd_node_string(bound));
+ }
+
+ ExessValue bound_value = {false};
+
+ const ExessVariableResult br = exess_read_value(
+ value_type, sizeof(bound_value), &bound_value, serd_node_string(bound));
+
+ if (br.status) {
+ return !!serd_logf_at(ctx->world,
+ SERD_LOG_LEVEL_ERROR,
+ serd_statement_caret(bound_statement),
+ "Failed to parse bound literal \"%s\" (%s)",
+ serd_node_string(bound),
+ exess_strerror(br.status));
+ }
+
+ ExessValue literal_value = {false};
+
+ const ExessVariableResult lr = exess_read_value(value_type,
+ sizeof(literal_value),
+ &literal_value,
+ serd_node_string(literal));
+
+ if (lr.status) {
+ return !!serd_logf_at(ctx->world,
+ SERD_LOG_LEVEL_ERROR,
+ serd_statement_caret(literal_statement),
+ "Failed to parse literal \"%s\" (%s)",
+ serd_node_string(literal),
+ exess_strerror(lr.status));
+ }
+
+ return exess_value_compare(value_type,
+ lr.write_count,
+ &literal_value,
+ value_type,
+ br.write_count,
+ &bound_value);
+}
+
+static bool
+less(const int cmp)
+{
+ return cmp < 0;
+}
+
+static bool
+less_equal(const int cmp)
+{
+ return cmp <= 0;
+}
+
+static bool
+greater(const int cmp)
+{
+ return cmp > 0;
+}
+
+static bool
+greater_equal(const int cmp)
+{
+ return cmp >= 0;
+}
+
+static SerdStatus
+check_bound(SerdValidator* const ctx,
+ const SerdValidatorCheck check,
+ const SerdStatement* const statement,
+ const SerdNode* const literal,
+ const SerdNode* const type,
+ const SerdNode* const restriction,
+ const SerdNode* const bound_property,
+ bool (*pred)(int),
+ const char* const message)
+{
+ SerdCursor* const b =
+ serd_model_find(ctx->model, restriction, bound_property, 0, 0);
+
+ if (serd_cursor_is_end(b)) {
+ serd_cursor_free(b);
+ return SERD_SUCCESS;
+ }
+
+ const SerdStatement* const bound_statement =
+ (const SerdStatement* SERD_NONNULL)serd_cursor_get(b);
+
+ const SerdNode* const bound = serd_statement_object(bound_statement);
+
+ const int cmp =
+ bound_cmp(ctx, statement, literal, type, bound_statement, bound);
+
+ serd_cursor_free(b);
+
+ return report_check(ctx,
+ statement,
+ check,
+ pred(cmp),
+ "Value \"%s\" %s \"%s\"",
+ serd_node_string(literal),
+ message,
+ serd_node_string(bound));
+}
+
+static RerexPattern*
+parse_regex(const SerdWorld* const world,
+ const SerdStatement* const pattern_statement,
+ const char* const regex)
+{
+ const SerdCaret* const caret =
+ pattern_statement ? serd_statement_caret(pattern_statement) : NULL;
+
+ RerexPattern* re = NULL;
+ size_t end = 0;
+ const RerexStatus st = rerex_compile(regex, &end, &re);
+ if (st) {
+ serd_logf_at(world,
+ SERD_LOG_LEVEL_ERROR,
+ caret,
+ "Error in pattern \"%s\" at offset %lu (%s)",
+ regex,
+ (unsigned long)end,
+ rerex_strerror(st));
+ return NULL;
+ }
+
+ return re;
+}
+
+static bool
+regex_match(SerdValidator* const ctx,
+ const SerdStatement* const pattern_statement,
+ const char* const regex,
+ const char* const str)
+{
+ RerexPattern* const re = parse_regex(ctx->world, pattern_statement, regex);
+ if (!re) {
+ return false;
+ }
+
+ RerexMatcher* const matcher = rerex_new_matcher(re);
+ const bool ret = rerex_match(matcher, str);
+
+ rerex_free_matcher(matcher);
+ rerex_free_pattern(re);
+
+ return ret;
+}
+
+static SerdStatus
+check_literal_restriction(SerdValidator* const ctx,
+ const SerdStatement* const statement,
+ const SerdNode* const literal,
+ const SerdNode* const type,
+ const SerdNode* const restriction)
+{
+ SerdStatus st = SERD_SUCCESS;
+
+ // Check xsd:pattern
+ const SerdStatement* const pat_statement = serd_model_get_statement(
+ ctx->model, restriction, ctx->uris.xsd_pattern, 0, 0);
+ if (pat_statement) {
+ const char* const str = serd_node_string(literal);
+ const SerdNode* const pat_node = serd_statement_object(pat_statement);
+ const char* const pat = serd_node_string(pat_node);
+
+ st = merge_status(st,
+ report_check(ctx,
+ statement,
+ SERD_CHECK_LITERAL_PATTERN,
+ regex_match(ctx, pat_statement, pat, str),
+ "Value \"%s\" doesn't match pattern \"%s\"",
+ serd_node_string(literal),
+ pat));
+ }
+
+ // Check inclusive/exclusive min and max
+
+ typedef bool (*BoundCmpPredicate)(int);
+
+ typedef struct {
+ SerdValidatorCheck check_id;
+ const SerdNode* restriction_property;
+ BoundCmpPredicate pred;
+ const char* const message;
+ } BoundCheck;
+
+ const BoundCheck bound_checks[] = {
+ {SERD_CHECK_LITERAL_MIN_INCLUSIVE,
+ ctx->uris.xsd_minInclusive,
+ greater_equal,
+ "<"},
+ {SERD_CHECK_LITERAL_MAX_INCLUSIVE,
+ ctx->uris.xsd_maxInclusive,
+ less_equal,
+ ">"},
+ {SERD_CHECK_LITERAL_MIN_EXCLUSIVE,
+ ctx->uris.xsd_minExclusive,
+ greater,
+ "<="},
+ {SERD_CHECK_LITERAL_MAX_EXCLUSIVE, ctx->uris.xsd_maxExclusive, less, ">="},
+ };
+
+ for (size_t i = 0; i < sizeof(bound_checks) / sizeof(BoundCheck); ++i) {
+ st = merge_status(st,
+ check_bound(ctx,
+ bound_checks[i].check_id,
+ statement,
+ literal,
+ type,
+ restriction,
+ bound_checks[i].restriction_property,
+ bound_checks[i].pred,
+ bound_checks[i].message));
+ }
+
+ return st;
+}
+
+static bool
+literal_is_valid(SerdValidator* const ctx,
+ const SerdStatement* const statement,
+ const SerdNode* const literal,
+ const SerdNode* const type)
+{
+ if (!type) {
+ return true;
+ }
+
+ // Check that datatype is defined
+ const SerdNode* const node_datatype = serd_node_datatype(literal);
+ if (node_datatype && report_check(ctx,
+ statement,
+ SERD_CHECK_DATATYPE_TYPE,
+ serd_model_ask(ctx->model,
+ node_datatype,
+ ctx->uris.rdf_type,
+ ctx->uris.rdfs_Datatype,
+ NULL),
+ "Undefined datatype <%s>",
+ serd_node_string(node_datatype))) {
+ return false;
+ }
+
+ const NodeQuote type_quote = format_node(ctx, type);
+
+ const ExessDatatype value_type =
+ node_datatype ? exess_datatype_from_uri(serd_node_string(node_datatype))
+ : EXESS_NOTHING;
+
+ if (value_type != EXESS_NOTHING) {
+ /* Check if the literal parses correctly by measuring the canonical string.
+ This is better than trying to read a variant here, because it
+ automatically supports some unbounded datatypes like xsd:decimal and
+ xsd:base64Binary without needing to allocate space for the value. */
+
+ const ExessResult r =
+ exess_write_canonical(serd_node_string(literal), value_type, 0, NULL);
+
+ if (report_check(ctx,
+ statement,
+ SERD_CHECK_LITERAL_VALUE,
+ r.status == EXESS_SUCCESS,
+ "Invalid xsd:%s literal \"%s\" (%s)",
+ serd_node_string(node_datatype) + sizeof(EXESS_XSD_URI) -
+ 1,
+ serd_node_string(literal),
+ exess_strerror(r.status))) {
+ return false;
+ }
+ }
+
+ // Find restrictions list
+ const SerdNode* head =
+ serd_model_get(ctx->model, type, ctx->uris.owl_withRestrictions, 0, 0);
+
+ // Walk list, checking each restriction
+ while (head) {
+ SerdCursor* const i_first =
+ serd_model_find(ctx->model, head, ctx->uris.rdf_first, 0, 0);
+
+ if (serd_cursor_is_end(i_first)) {
+ serd_cursor_free(i_first);
+ break;
+ }
+
+ const SerdStatement* const s_first =
+ (const SerdStatement* SERD_NONNULL)serd_cursor_get(i_first);
+
+ const SerdNode* const first = serd_statement_object(s_first);
+
+ // Check this restriction
+ if (check_literal_restriction(ctx, statement, literal, type, first)) {
+ log_note(ctx,
+ s_first,
+ SERD_CHECK_LITERAL_RESTRICTION,
+ "Restriction on datatype " QUOTE_FMT,
+ QUOTE_ARGS(type_quote));
+ serd_cursor_free(i_first);
+ return false;
+ }
+
+ // Seek to next list node
+ head = serd_model_get(ctx->model, head, ctx->uris.rdf_rest, 0, 0);
+ serd_cursor_free(i_first);
+ }
+
+ // Recurse up datatype hierarchy
+ const SerdNode* const super =
+ serd_model_get(ctx->model, type, ctx->uris.owl_onDatatype, 0, 0);
+
+ // FIXME: check for cycles
+ return super ? literal_is_valid(ctx, statement, literal, super) : true;
+}
+
+static bool
+is_a(SerdValidator* const ctx,
+ const SerdNode* const node,
+ const SerdNode* const type)
+{
+ if (serd_model_ask(ctx->model, node, ctx->uris.rdf_type, type, 0)) {
+ return true; // Instance explicitly has this type
+ }
+
+ SerdCursor* const node_types =
+ serd_model_find(ctx->model, node, ctx->uris.rdf_type, NULL, NULL);
+
+ SERD_FOREACH_NODE (SERD_OBJECT, node_type, node_types) {
+ if (is_subclass(ctx, node_type, type)) {
+ serd_cursor_free(node_types);
+ return true; // Instance explicitly has a subtype of this type
+ }
+ }
+
+ serd_cursor_free(node_types);
+ return false;
+}
+
+static SerdStatus
+check_instance_union_type(SerdValidator* const ctx,
+ const SerdValidatorCheck check,
+ const SerdNode* const root_klass,
+ const SerdStatement* const statement,
+ const SerdNode* const instance,
+ const SerdNode* const klass)
+{
+ SerdStatus st = SERD_SUCCESS;
+
+ // Check owl:unionOf
+ /* if (serd_node_type(klass) == SERD_BLANK) { */
+ const SerdNode* const union_list =
+ serd_model_get(ctx->model, klass, ctx->uris.owl_unionOf, NULL, NULL);
+
+ for (const SerdNode* l = union_list; l;
+ l = serd_model_get(ctx->model, l, ctx->uris.rdf_rest, NULL, NULL)) {
+ const SerdNode* const element =
+ serd_model_get(ctx->model, l, ctx->uris.rdf_first, NULL, NULL);
+ if (element) {
+ ctx->suppressed = true;
+
+ st = check_instance_type(
+ ctx, check, root_klass, statement, instance, element);
+
+ ctx->suppressed = false;
+ if (!st) {
+ return SERD_SUCCESS;
+ }
+ }
+ }
+
+ if (union_list) {
+ return report_check(ctx,
+ statement,
+ check,
+ false,
+ "Instance " QUOTE_FMT " is not any type in union",
+ QUOTE_ARGS(format_node(ctx, instance)));
+ }
+ /* } */
+
+ return st;
+}
+
+static SerdStatus
+check_instance_super_types(SerdValidator* const ctx,
+ const SerdValidatorCheck check,
+ const SerdStatement* const statement,
+ const SerdNode* const instance,
+ const SerdNode* const klass)
+{
+ SerdCursor* const supers =
+ serd_model_find(ctx->model, klass, ctx->uris.rdfs_subClassOf, NULL, NULL);
+
+ SERD_FOREACH_NODE (SERD_OBJECT, super, supers) {
+ if (!serd_node_equals(klass, super) &&
+ !serd_node_equals(super, ctx->uris.rdfs_Class) &&
+ !serd_node_equals(super, ctx->uris.owl_Class)) {
+ if (check_instance_type(ctx, check, klass, statement, instance, super)) {
+ if (serd_node_type(super) == SERD_URI) {
+ log_note(ctx,
+ serd_cursor_get(supers),
+ check,
+ "A " QUOTE_FMT " is a " QUOTE_FMT,
+ QUOTE_ARGS(format_node(ctx, klass)),
+ QUOTE_ARGS(format_node(ctx, super)));
+ }
+
+ serd_cursor_free(supers);
+ return SERD_BAD_DATA;
+ }
+ }
+ }
+ serd_cursor_free(supers);
+
+ return SERD_SUCCESS;
+}
+
+static SerdStatus
+check_instance_type(SerdValidator* const ctx,
+ const SerdValidatorCheck check,
+ const SerdNode* const root_klass,
+ const SerdStatement* const statement,
+ const SerdNode* const instance,
+ const SerdNode* const klass)
+{
+ SerdStatus st = SERD_SUCCESS;
+
+ // Every instance is inherently a rdfs:Resource and owl:Thing
+ if (serd_node_equals(klass, ctx->uris.rdfs_Resource) ||
+ serd_node_equals(klass, ctx->uris.owl_Thing)) {
+ return SERD_SUCCESS;
+ }
+
+ // If the class is xsd:anyURI, check that the instance node is a URI
+ if (serd_node_equals(klass, ctx->uris.xsd_anyURI) ||
+ is_subdatatype(ctx, klass, ctx->uris.xsd_anyURI)) {
+ return report_check(ctx,
+ statement,
+ SERD_CHECK_ANY_URI,
+ serd_node_type(instance) == SERD_URI,
+ "Node " QUOTE_FMT " isn't a URI",
+ QUOTE_ARGS(format_node(ctx, instance)));
+ }
+
+ // Check that instance is not somehow also a rdfs:Literal or rdfs:Datatype
+ if (report_check(ctx,
+ statement,
+ SERD_CHECK_INSTANCE_LITERAL,
+ (!is_subclass(ctx, klass, ctx->uris.rdfs_Literal) &&
+ !is_a(ctx, klass, ctx->uris.rdfs_Datatype)),
+ "Instance " QUOTE_FMT " isn't a literal",
+ QUOTE_ARGS(format_node(ctx, instance)))) {
+ return SERD_BAD_DATA;
+ }
+
+ // Check the instance against the class restriction (if applicable)
+ if ((st = check_instance_restriction(
+ ctx, root_klass, statement, instance, klass))) {
+ return st;
+ }
+
+ // Check the instance against each type in the union class (if applicable)
+ if ((st = check_instance_union_type(
+ ctx, check, root_klass, statement, instance, klass))) {
+ return st;
+ }
+
+ // Check the instance against any superclasses of the class
+ if ((st =
+ check_instance_super_types(ctx, check, statement, instance, klass))) {
+ return st;
+ }
+
+ // No contradictions, so succeed if the instance explicitly has this type
+ if (is_a(ctx, instance, klass)) {
+ return SERD_SUCCESS;
+ }
+
+ if ((ctx->checks & (1ul << SERD_CHECK_EXPLICIT_INSTANCE_TYPE)) &&
+ serd_node_type(instance) != SERD_BLANK) {
+ return report_check(ctx,
+ statement,
+ SERD_CHECK_EXPLICIT_INSTANCE_TYPE,
+ false,
+ "Instance " QUOTE_FMT
+ " does not explicitly have type " QUOTE_FMT,
+ QUOTE_ARGS(format_node(ctx, instance)),
+ QUOTE_ARGS(format_node(ctx, klass)));
+ }
+
+ return SERD_SUCCESS;
+}
+
+static SerdStatus
+check_type(SerdValidator* const ctx,
+ const SerdValidatorCheck check,
+ const SerdStatement* const statement,
+ const SerdNode* const node,
+ const SerdNode* const type)
+{
+ const NodeQuote type_quote = format_node(ctx, type);
+
+ // Everything is an rdfs:Resource
+ if (serd_node_equals(type, ctx->uris.rdfs_Resource)) {
+ return SERD_SUCCESS;
+ }
+
+ switch (serd_node_type(node)) {
+ case SERD_LITERAL:
+ // Every literal is an rdfs:Literal
+ if (serd_node_equals(type, ctx->uris.rdfs_Literal)) {
+ return SERD_SUCCESS;
+ }
+
+ // A plain literal can not have a datatype
+ if (serd_node_equals(type, ctx->uris.rdf_PlainLiteral)) {
+ if (report_check(ctx,
+ statement,
+ SERD_CHECK_PLAIN_LITERAL_DATATYPE,
+ !serd_node_datatype(node),
+ "Typed literal \"%s\" isn't a plain literal",
+ serd_node_string(node))) {
+ return SERD_BAD_DATA;
+ }
+ } else if (report_check(ctx,
+ statement,
+ SERD_CHECK_LITERAL_INSTANCE,
+ is_a(ctx, type, ctx->uris.rdfs_Datatype),
+ "Literal \"%s\" isn't an instance of " QUOTE_FMT,
+ serd_node_string(node),
+ QUOTE_ARGS(type_quote))) {
+ return SERD_BAD_DATA;
+ }
+
+ return literal_is_valid(ctx, statement, node, type) ? SERD_SUCCESS
+ : SERD_BAD_DATA;
+
+ case SERD_URI:
+ if (serd_node_equals(type, ctx->uris.xsd_anyURI)) {
+ return SERD_SUCCESS;
+ }
+ break;
+
+ case SERD_BLANK:
+ case SERD_VARIABLE:
+ break;
+ }
+
+ return check_instance_type(ctx, check, type, statement, node, type);
+}
+
+static Count
+count_non_blanks(SerdCursor* const i, const SerdField field)
+{
+ Count n = 0u;
+ SERD_FOREACH (s, i) {
+ const SerdNode* node = serd_statement_node(s, field);
+ if (node && serd_node_type(node) != SERD_BLANK) {
+ ++n;
+ }
+ }
+ return n;
+}
+
+static SerdStatus
+check_cardinality_restriction(SerdValidator* const ctx,
+ const SerdNode* const root_klass,
+ const SerdNode* const restriction,
+ const SerdStatement* const statement,
+ const SerdNode* const instance)
+{
+ const SerdNode* const prop = serd_model_get(
+ ctx->model, restriction, ctx->uris.owl_onProperty, NULL, NULL);
+
+ const SerdStatement* const equal_statement = serd_model_get_statement(
+ ctx->model, restriction, ctx->uris.owl_cardinality, NULL, NULL);
+
+ const SerdStatement* const min_statement = serd_model_get_statement(
+ ctx->model, restriction, ctx->uris.owl_minCardinality, NULL, NULL);
+
+ const SerdStatement* const max_statement = serd_model_get_statement(
+ ctx->model, restriction, ctx->uris.owl_maxCardinality, NULL, NULL);
+
+ if (!equal_statement && !min_statement && !max_statement) {
+ return SERD_SUCCESS;
+ }
+
+ const NodeQuote prop_quote = format_node(ctx, prop);
+ const NodeQuote klass_quote = format_node(ctx, root_klass);
+
+ SerdStatus st = SERD_SUCCESS;
+ const Count n_values =
+ (Count)serd_model_count(ctx->model, instance, prop, NULL, NULL);
+
+ // Check owl:cardinality
+ if (equal_statement) {
+ const SerdNode* card = serd_statement_object(equal_statement);
+ const Count expected = strtoul(serd_node_string(card), NULL, 10);
+ if ((st = report_check(ctx,
+ statement,
+ SERD_CHECK_CARDINALITY_EQUAL,
+ n_values == expected,
+ "Instance " QUOTE_FMT " has %lu " QUOTE_FMT
+ " properties",
+ QUOTE_ARGS(format_node(ctx, instance)),
+ n_values,
+ QUOTE_ARGS(prop_quote)))) {
+ log_note(ctx,
+ equal_statement,
+ SERD_CHECK_CARDINALITY_EQUAL,
+ "A " QUOTE_FMT " must have exactly %lu",
+ QUOTE_ARGS(klass_quote),
+ expected);
+ return st;
+ }
+ }
+
+ // Check owl:minCardinality
+ if (min_statement) {
+ const SerdNode* card = serd_statement_object(min_statement);
+ const Count n_min = strtoul(serd_node_string(card), NULL, 10);
+ if ((st = report_check(ctx,
+ statement,
+ SERD_CHECK_CARDINALITY_MIN,
+ n_values >= n_min,
+ "Instance " QUOTE_FMT " has %lu " QUOTE_FMT
+ " properties",
+ QUOTE_ARGS(format_node(ctx, instance)),
+ n_values,
+ QUOTE_ARGS(prop_quote)))) {
+ log_note(ctx,
+ min_statement,
+ SERD_CHECK_CARDINALITY_MIN,
+ "A " QUOTE_FMT " must have at least %lu",
+ QUOTE_ARGS(klass_quote),
+ n_min);
+ return st;
+ }
+ }
+
+ // Check owl:maxCardinality
+ if (max_statement) {
+ const SerdNode* const card = serd_statement_object(max_statement);
+ const Count n_max = strtoul(serd_node_string(card), NULL, 10);
+ if ((st = report_check(ctx,
+ statement,
+ SERD_CHECK_CARDINALITY_MAX,
+ n_values <= n_max,
+ "Instance " QUOTE_FMT " has %lu " QUOTE_FMT
+ " properties",
+ QUOTE_ARGS(format_node(ctx, instance)),
+ n_values,
+ QUOTE_ARGS(prop_quote)))) {
+ log_note(ctx,
+ max_statement,
+ SERD_CHECK_CARDINALITY_MAX,
+ "A " QUOTE_FMT " must have at most %lu",
+ QUOTE_ARGS(klass_quote),
+ n_max);
+ return st;
+ }
+ }
+
+ return st;
+}
+
+static SerdStatus
+check_property_value_restriction(SerdValidator* const ctx,
+ const SerdNode* const root_klass,
+ const SerdNode* const restriction,
+ const SerdStatement* const statement,
+ const SerdNode* const instance)
+{
+ SerdStatus st = SERD_SUCCESS;
+
+ const SerdNode* const prop = serd_model_get(
+ ctx->model, restriction, ctx->uris.owl_onProperty, NULL, NULL);
+
+ const SerdStatement* const all_statement = serd_model_get_statement(
+ ctx->model, restriction, ctx->uris.owl_allValuesFrom, NULL, NULL);
+
+ const SerdStatement* const some_statement = serd_model_get_statement(
+ ctx->model, restriction, ctx->uris.owl_someValuesFrom, NULL, NULL);
+
+ if (!all_statement && !some_statement) {
+ return SERD_SUCCESS;
+ }
+
+ const NodeQuote prop_quote = format_node(ctx, prop);
+ const NodeQuote klass_quote = format_node(ctx, root_klass);
+
+ SerdCursor* const values =
+ serd_model_find(ctx->model, instance, prop, NULL, NULL);
+
+ if (all_statement) {
+ const SerdNode* const type = serd_statement_object(all_statement);
+ const NodeQuote type_quote = format_node(ctx, type);
+ SERD_FOREACH (v, values) {
+ const SerdNode* const value = serd_statement_object(v);
+ const SerdStatus all_st = report_check(
+ ctx,
+ v,
+ SERD_CHECK_ALL_VALUES_FROM,
+ !check_type(ctx, SERD_CHECK_ALL_VALUES_FROM, v, value, type),
+ "Value isn't a " QUOTE_FMT,
+ QUOTE_ARGS(type_quote));
+
+ if (all_st) {
+ st = merge_status(st, all_st);
+ log_note(ctx,
+ all_statement,
+ SERD_CHECK_ALL_VALUES_FROM,
+ "Required for any " QUOTE_FMT " of a " QUOTE_FMT,
+ QUOTE_ARGS(prop_quote),
+ QUOTE_ARGS(klass_quote));
+ }
+ }
+ }
+
+ if (some_statement) {
+ const SerdNode* const type = serd_statement_object(some_statement);
+ const NodeQuote type_quote = format_node(ctx, type);
+
+ // Search for some value with the required type
+ bool found = false;
+ {
+ ctx->suppressed = true;
+ SERD_FOREACH_NODE (SERD_OBJECT, value, values) {
+ if (!check_type(
+ ctx, SERD_CHECK_SOME_VALUES_FROM, statement, value, type)) {
+ found = true;
+ break;
+ }
+ }
+ ctx->suppressed = false;
+ }
+
+ assert(!ctx->suppressed);
+ const SerdStatus some_st =
+ report_check(ctx,
+ statement,
+ SERD_CHECK_SOME_VALUES_FROM,
+ found,
+ QUOTE_FMT " has no " QUOTE_FMT " that is a " QUOTE_FMT,
+ QUOTE_ARGS(format_node(ctx, instance)),
+ QUOTE_ARGS(prop_quote),
+ QUOTE_ARGS(type_quote));
+
+ if (some_st && (ctx->checks & (1ull << SERD_CHECK_SOME_VALUES_FROM))) {
+ log_note(ctx,
+ some_statement,
+ SERD_CHECK_SOME_VALUES_FROM,
+ "An instance of " QUOTE_FMT " must have at least 1",
+ QUOTE_ARGS(klass_quote));
+ }
+
+ st = merge_status(st, some_st);
+ }
+
+ serd_cursor_free(values);
+
+ return st;
+}
+
+static SerdStatus
+check_instance_restriction(SerdValidator* const ctx,
+ const SerdNode* const root_klass,
+ const SerdStatement* const statement,
+ const SerdNode* const instance,
+ const SerdNode* const restriction)
+{
+ SerdStatus st = SERD_SUCCESS;
+
+ st = merge_status(st,
+ check_cardinality_restriction(
+ ctx, root_klass, restriction, statement, instance));
+
+ st = merge_status(st,
+ check_property_value_restriction(
+ ctx, root_klass, restriction, statement, instance));
+
+ return st;
+}
+
+/* Top-Level Checks */
+
+static SerdStatus
+check_class_label(SerdValidator* const ctx)
+{
+ const SerdModel* const model = ctx->model;
+ const URIs* const uris = &ctx->uris;
+ SerdStatus st = SERD_SUCCESS;
+
+ // For each rdfs:Class
+ SerdCursor* const klasses =
+ serd_model_find(model, NULL, uris->rdf_type, uris->rdfs_Class, ctx->graph);
+ SERD_FOREACH (k, klasses) {
+ const SerdNode* const klass = serd_statement_subject(k);
+
+ // Check that it has an rdfs:label in the same graph
+ st = merge_status(
+ st,
+ report_check(
+ ctx,
+ k,
+ SERD_CHECK_CLASS_LABEL,
+ serd_model_ask(ctx->model, klass, uris->rdfs_label, 0, ctx->graph),
+ "Class <%s> has no label",
+ serd_node_string(klass)));
+ }
+ serd_cursor_free(klasses);
+
+ return st;
+}
+
+static SerdStatus
+check_datatype_property(SerdValidator* const ctx)
+{
+ const SerdModel* const model = ctx->model;
+ const URIs* uris = &ctx->uris;
+ SerdStatus st = SERD_SUCCESS;
+
+ // For each owl:DatatypeProperty
+ SerdCursor* const properties = serd_model_find(
+ model, NULL, uris->rdf_type, uris->owl_DatatypeProperty, NULL);
+ SERD_FOREACH (p, properties) {
+ const SerdNode* const prop = serd_statement_subject(p);
+ const NodeQuote prop_quote = format_node(ctx, prop);
+
+ // For each statement of this property in the target graph
+ SerdCursor* const statements =
+ serd_model_find(model, NULL, prop, NULL, ctx->graph);
+ SERD_FOREACH (s, statements) {
+ const SerdNode* const object = serd_statement_object(s);
+
+ // Check that the object is a literal
+ if ((st = report_check(ctx,
+ s,
+ SERD_CHECK_DATATYPE_PROPERTY,
+ serd_node_type(object) == SERD_LITERAL,
+ QUOTE_FMT " isn't a literal",
+ QUOTE_ARGS(format_node(ctx, object))))) {
+ log_note(ctx,
+ p,
+ SERD_CHECK_DATATYPE_PROPERTY,
+ "A " QUOTE_FMT " must be a literal",
+ QUOTE_ARGS(prop_quote));
+ }
+ }
+ serd_cursor_free(statements);
+ }
+ serd_cursor_free(properties);
+
+ return st;
+}
+
+static SerdStatus
+check_deprecated(SerdValidator* const ctx)
+{
+ const SerdModel* const model = ctx->model;
+ SerdStatus st = SERD_SUCCESS;
+
+ // For each deprecated thing
+ SerdCursor* const things = serd_model_find(
+ model, NULL, ctx->uris.owl_deprecated, ctx->true_node, NULL);
+ SERD_FOREACH (t, things) {
+ const SerdNode* const thing = serd_statement_subject(t);
+ const NodeQuote thing_quote = format_node(ctx, thing);
+
+ if (is_a(ctx, thing, ctx->uris.rdf_Property)) {
+ // For each statement of this property in the target graph
+ SerdCursor* const statements =
+ serd_model_find(model, NULL, thing, NULL, ctx->graph);
+ SERD_FOREACH (s, statements) {
+ st = report_check(ctx,
+ s,
+ SERD_CHECK_DEPRECATED_PROPERTY,
+ false,
+ "Use of deprecated property");
+ log_note(ctx,
+ t,
+ SERD_CHECK_DEPRECATED_PROPERTY,
+ "Property " QUOTE_FMT " is deprecated",
+ QUOTE_ARGS(thing_quote));
+ }
+ serd_cursor_free(statements);
+
+ } else if (is_a(ctx, thing, ctx->uris.rdfs_Class)) {
+ // For each explicit instance of this class in the target graph
+ SerdCursor* const statements =
+ serd_model_find(model, NULL, ctx->uris.rdf_type, thing, ctx->graph);
+ SERD_FOREACH (s, statements) {
+ st = report_check(ctx,
+ s,
+ SERD_CHECK_DEPRECATED_CLASS,
+ false,
+ "Instance of deprecated class");
+ log_note(ctx,
+ t,
+ SERD_CHECK_DEPRECATED_CLASS,
+ "Class " QUOTE_FMT " is deprecated",
+ QUOTE_ARGS(thing_quote));
+ }
+ serd_cursor_free(statements);
+ }
+ }
+ serd_cursor_free(things);
+
+ return st;
+}
+
+static SerdStatus
+check_functional_property(SerdValidator* const ctx)
+{
+ const SerdModel* const model = ctx->model;
+ const URIs* uris = &ctx->uris;
+ SerdStatus st = SERD_SUCCESS;
+
+ // For each owl:FunctionalProperty
+ SerdCursor* const properties = serd_model_find(
+ model, NULL, uris->rdf_type, uris->owl_FunctionalProperty, NULL);
+ SERD_FOREACH (p, properties) {
+ const SerdNode* const prop = serd_statement_subject(p);
+ const NodeQuote prop_quote = format_node(ctx, prop);
+
+ const SerdNode* last_subj = NULL;
+
+ // For each instance with this property in the target graph
+ SerdCursor* const statements =
+ serd_model_find(model, NULL, prop, NULL, ctx->graph);
+ SERD_FOREACH (s, statements) {
+ const SerdNode* const subj = serd_statement_subject(s);
+ if (serd_node_equals(subj, last_subj)) {
+ continue;
+ }
+
+ // Count the number of values on this instance
+ SerdCursor* const o =
+ serd_model_find(ctx->model, subj, prop, NULL, ctx->graph);
+ const Count n = count_non_blanks(o, SERD_OBJECT);
+
+ serd_cursor_free(o);
+ if (report_check(ctx,
+ s,
+ SERD_CHECK_FUNCTIONAL_PROPERTY,
+ n <= 1,
+ "Instance has %lu " QUOTE_FMT " properties",
+ n,
+ QUOTE_ARGS(prop_quote))) {
+ st = SERD_BAD_DATA;
+ log_note(ctx,
+ p,
+ SERD_CHECK_FUNCTIONAL_PROPERTY,
+ "An instance may have at most 1");
+ }
+
+ last_subj = subj;
+ }
+ serd_cursor_free(statements);
+ }
+ serd_cursor_free(properties);
+
+ return st;
+}
+
+// FIXME: name
+static SerdStatus
+check_instance(SerdValidator* const ctx)
+{
+ const SerdModel* const model = ctx->model;
+ const URIs* uris = &ctx->uris;
+ SerdStatus st = SERD_SUCCESS;
+
+ // For each rdf:type property in the target graph
+ SerdCursor* const types =
+ serd_model_find(model, NULL, uris->rdf_type, NULL, ctx->graph);
+ SERD_FOREACH (t, types) {
+ const SerdNode* const instance = serd_statement_subject(t);
+ const SerdNode* const type = serd_statement_object(t);
+ const NodeQuote type_quote = format_node(ctx, type);
+
+ if ((st = check_instance_type(
+ ctx, SERD_CHECK_INSTANCE_TYPE, type, t, instance, type))) {
+ log_note(ctx,
+ t,
+ SERD_CHECK_INSTANCE_TYPE,
+ "Instance is explicitly a " QUOTE_FMT,
+ QUOTE_ARGS(type_quote));
+ break;
+ }
+ }
+ serd_cursor_free(types);
+
+ return st;
+}
+
+static SerdStatus
+check_inverse_functional_property(SerdValidator* const ctx)
+{
+ const SerdModel* const model = ctx->model;
+ const URIs* uris = &ctx->uris;
+ SerdStatus st = SERD_SUCCESS;
+
+ // For each owl:InverseFunctionalProperty
+ SerdCursor* const properties = serd_model_find(
+ model, NULL, uris->rdf_type, uris->owl_InverseFunctionalProperty, NULL);
+ SERD_FOREACH (p, properties) {
+ const SerdNode* const prop = serd_statement_subject(p);
+ const NodeQuote prop_quote = format_node(ctx, prop);
+
+ const SerdNode* last_obj = NULL;
+
+ // For each value of this property in the target graph
+ SerdCursor* const statements =
+ serd_model_find(model, NULL, prop, NULL, ctx->graph);
+ SERD_FOREACH (statement, statements) {
+ const SerdNode* const obj = serd_statement_object(statement);
+ const NodeQuote obj_quote = format_node(ctx, obj);
+ if (serd_node_equals(obj, last_obj)) {
+ continue;
+ }
+
+ // Count the number of subjects with this value in the target graph
+ SerdCursor* s = serd_model_find(ctx->model, NULL, prop, obj, ctx->graph);
+ const Count n = count_non_blanks(s, SERD_SUBJECT);
+
+ if (n > 1) {
+ // Get the range again so we can print a note for every value
+ serd_cursor_free(s);
+ s = serd_model_find(ctx->model, NULL, prop, obj, ctx->graph);
+
+ SERD_FOREACH (value_statement, s) {
+ const SerdNode* const subj = serd_statement_subject(value_statement);
+ const NodeQuote subj_quote = format_node(ctx, subj);
+
+ report_check(ctx,
+ value_statement,
+ SERD_CHECK_INVERSE_FUNCTIONAL_PROPERTY,
+ false,
+ "Instance " QUOTE_FMT " shares the " QUOTE_FMT
+ " " QUOTE_FMT,
+ QUOTE_ARGS(subj_quote),
+ QUOTE_ARGS(prop_quote),
+ QUOTE_ARGS(obj_quote));
+ }
+
+ log_note(ctx,
+ p,
+ SERD_CHECK_INVERSE_FUNCTIONAL_PROPERTY,
+ "At most 1 instance may have a given " QUOTE_FMT,
+ QUOTE_ARGS(prop_quote));
+ }
+
+ serd_cursor_free(s);
+ last_obj = obj;
+ }
+ serd_cursor_free(statements);
+ }
+ serd_cursor_free(properties);
+
+ return st;
+}
+
+static SerdStatus
+check_object_property(SerdValidator* const ctx)
+{
+ const SerdModel* const model = ctx->model;
+ SerdStatus st = SERD_SUCCESS;
+
+ // For each owl:ObjectProperty
+ SerdCursor* const properties = serd_model_find(
+ model, NULL, ctx->uris.rdf_type, ctx->uris.owl_ObjectProperty, NULL);
+ SERD_FOREACH_NODE (SERD_SUBJECT, prop, properties) {
+ const NodeQuote prop_quote = format_node(ctx, prop);
+
+ // For each statement of this property in the target graph
+ SerdCursor* const statements =
+ serd_model_find(model, NULL, prop, NULL, ctx->graph);
+ SERD_FOREACH (s, statements) {
+ if (report_check(ctx,
+ s,
+ SERD_CHECK_OBJECT_PROPERTY,
+ serd_node_type(serd_statement_object(s)) != SERD_LITERAL,
+ "Object property has literal value")) {
+ st = SERD_BAD_DATA;
+ log_note(ctx,
+ serd_cursor_get(properties),
+ SERD_CHECK_OBJECT_PROPERTY,
+ "A " QUOTE_FMT " must be an instance",
+ QUOTE_ARGS(prop_quote));
+ }
+ }
+ serd_cursor_free(statements);
+ }
+ serd_cursor_free(properties);
+
+ return st;
+}
+
+static SerdStatus
+check_property_domain(SerdValidator* const ctx)
+{
+ const SerdModel* const model = ctx->model;
+ SerdStatus st = SERD_SUCCESS;
+
+ // For each property with an rdfs:domain
+ SerdCursor* const properties =
+ serd_model_find(model, NULL, ctx->uris.rdfs_domain, NULL, NULL);
+ SERD_FOREACH (p, properties) {
+ const SerdNode* const prop = serd_statement_subject(p);
+ const NodeQuote prop_quote = format_node(ctx, prop);
+ const SerdNode* const domain = serd_statement_object(p);
+ const NodeQuote domain_quote = format_node(ctx, domain);
+
+ // For each statement of this property in the target graph
+ SerdCursor* const statements =
+ serd_model_find(model, NULL, prop, NULL, ctx->graph);
+ SERD_FOREACH (statement, statements) {
+ const SerdNode* const subj = serd_statement_subject(statement);
+
+ // Check that the subject is in the domain
+ if (check_instance_type(
+ ctx, SERD_CHECK_PROPERTY_DOMAIN, domain, statement, subj, domain)) {
+ log_note(ctx,
+ p,
+ SERD_CHECK_PROPERTY_DOMAIN,
+ "An instance with a " QUOTE_FMT " must be a " QUOTE_FMT,
+ QUOTE_ARGS(prop_quote),
+ QUOTE_ARGS(domain_quote));
+ }
+ }
+ serd_cursor_free(statements);
+ }
+ serd_cursor_free(properties);
+
+ return st;
+}
+
+static SerdStatus
+check_property_label(SerdValidator* const ctx)
+{
+ const SerdModel* const model = ctx->model;
+ SerdStatus st = SERD_SUCCESS;
+
+ // For each ?property a rdf:Property in the target graph
+ SerdCursor* const properties = serd_model_find(
+ model, NULL, ctx->uris.rdf_type, ctx->uris.rdf_Property, ctx->graph);
+ SERD_FOREACH (p, properties) {
+ const SerdNode* const property = serd_statement_subject(p);
+
+ update_status(
+ &st,
+ report_check(ctx,
+ p,
+ SERD_CHECK_PROPERTY_LABEL,
+ serd_model_ask(
+ ctx->model, property, ctx->uris.rdfs_label, 0, ctx->graph),
+ "Property <%s> has no label",
+ serd_node_string(property)));
+ }
+ serd_cursor_free(properties);
+
+ return st;
+}
+
+static SerdStatus
+check_property_range(SerdValidator* const ctx)
+{
+ const SerdModel* const model = ctx->model;
+ SerdStatus st = SERD_SUCCESS;
+
+ // For each property with an rdfs:range
+ SerdCursor* const properties =
+ serd_model_find(model, NULL, ctx->uris.rdfs_range, NULL, NULL);
+ SERD_FOREACH (p, properties) {
+ const SerdNode* const prop = serd_statement_subject(p);
+ const SerdNode* const klass = serd_statement_object(p);
+ const NodeQuote prop_quote = format_node(ctx, prop);
+
+ // For each statement of this property in the target graph
+ SerdCursor* const statements =
+ serd_model_find(model, NULL, prop, NULL, ctx->graph);
+ SERD_FOREACH (statement, statements) {
+ const SerdNode* const obj = serd_statement_object(statement);
+
+ // Check that the object is in the range
+ const SerdStatus range_st =
+ check_type(ctx, SERD_CHECK_PROPERTY_RANGE, statement, obj, klass);
+ if (range_st) {
+ log_note(ctx,
+ p,
+ SERD_CHECK_PROPERTY_RANGE,
+ "Required for any " QUOTE_FMT " value",
+ QUOTE_ARGS(prop_quote));
+
+ st = st ? st : range_st;
+ }
+ }
+ serd_cursor_free(statements);
+ }
+ serd_cursor_free(properties);
+
+ return st;
+}
+
+static SerdStatus
+check_predicate_type(SerdValidator* const ctx)
+{
+ // For each predicate
+ SerdStatus st = SERD_SUCCESS;
+ const SerdNode* last_pred = NULL;
+ // FIXME: graph
+ SerdCursor* const all = serd_model_begin_ordered(ctx->model, SERD_ORDER_POS);
+ SERD_FOREACH (s, all) {
+ const SerdNode* const g = serd_statement_graph(s);
+ const SerdNode* const pred = serd_statement_predicate(s);
+ if (serd_node_equals(pred, last_pred) ||
+ (ctx->graph && g && !serd_node_equals(ctx->graph, g))) {
+ continue;
+ }
+
+ const bool defined = serd_model_ask(ctx->model, pred, NULL, NULL, NULL);
+
+ st = merge_status(st,
+ report_check(ctx,
+ s,
+ SERD_CHECK_PREDICATE_TYPE,
+ defined,
+ "Undefined property <%s>",
+ serd_node_string(pred)));
+
+ if (defined) {
+ st = merge_status(
+ st,
+ report_check(
+ ctx,
+ s,
+ SERD_CHECK_PREDICATE_TYPE,
+ serd_model_ask(ctx->model, pred, ctx->uris.rdf_type, NULL, NULL) &&
+ is_a(ctx, pred, ctx->uris.rdf_Property),
+ "<%s> isn't a property",
+ serd_node_string(pred)));
+ }
+
+ last_pred = pred;
+ }
+ serd_cursor_free(all);
+
+ return st;
+}
+
+static SerdStatus
+check_acyclic(SerdValidator* const ctx,
+ const SerdValidatorCheck check,
+ const SerdNode* const root,
+ const SerdNode* const node,
+ const SerdNode* const property,
+ const char* const fmt)
+{
+ SerdStatus st = SERD_SUCCESS;
+
+ // FIXME: graph
+ SerdCursor* const links =
+ serd_model_find(ctx->model, node, property, NULL, NULL);
+ SERD_FOREACH (l, links) {
+ const SerdNode* const object = serd_statement_object(l);
+ const NodeQuote object_quote = format_node(ctx, object);
+
+ if ((st = report_check(ctx,
+ l,
+ check,
+ !serd_node_equals(object, root),
+ fmt,
+ QUOTE_ARGS(object_quote)))) {
+ break;
+ }
+
+ if ((st = check_acyclic(ctx, check, root, object, property, fmt))) {
+ log_note(ctx, l, check, "Via " QUOTE_FMT, QUOTE_ARGS(object_quote));
+ break;
+ }
+ }
+ serd_cursor_free(links);
+
+ return st;
+}
+
+static SerdStatus
+check_subclass_cycle(SerdValidator* const ctx)
+{
+ const SerdModel* const model = ctx->model;
+ SerdStatus st = SERD_SUCCESS;
+
+ // For each subclass
+ SerdCursor* const properties =
+ serd_model_find(model, NULL, ctx->uris.rdfs_subClassOf, NULL, NULL);
+ SERD_FOREACH_NODE (SERD_SUBJECT, root, properties) {
+ st = merge_status(st,
+ check_acyclic(ctx,
+ SERD_CHECK_CLASS_CYCLE,
+ root,
+ root,
+ ctx->uris.rdfs_subClassOf,
+ "Class " QUOTE_FMT
+ " is a sub-class of itself"));
+ }
+ serd_cursor_free(properties);
+
+ return st;
+}
+
+static SerdStatus
+check_ondatatype_cycle(SerdValidator* const ctx)
+{
+ const SerdModel* const model = ctx->model;
+ SerdStatus st = SERD_SUCCESS;
+
+ // For refined datatype
+ SerdCursor* const properties =
+ serd_model_find(model, NULL, ctx->uris.owl_onDatatype, NULL, NULL);
+ SERD_FOREACH_NODE (SERD_SUBJECT, root, properties) {
+ st = merge_status(st,
+ check_acyclic(ctx,
+ SERD_CHECK_DATATYPE_CYCLE,
+ root,
+ root,
+ ctx->uris.owl_onDatatype,
+ "Class " QUOTE_FMT
+ " is a sub-datatype of itself"));
+ }
+ serd_cursor_free(properties);
+
+ return st;
+}
+
+static SerdStatus
+check_subproperty_cycle(SerdValidator* const ctx)
+{
+ const SerdModel* const model = ctx->model;
+ SerdStatus st = SERD_SUCCESS;
+
+ // For each subproperty relation
+ SerdCursor* const properties =
+ serd_model_find(model, NULL, ctx->uris.rdfs_subPropertyOf, NULL, NULL);
+ SERD_FOREACH_NODE (SERD_SUBJECT, root, properties) {
+ st = merge_status(st,
+ check_acyclic(ctx,
+ SERD_CHECK_PROPERTY_CYCLE,
+ root,
+ root,
+ ctx->uris.rdfs_subPropertyOf,
+ "Property " QUOTE_FMT
+ " is a sub-property of itself"));
+ }
+ serd_cursor_free(properties);
+
+ return st;
+}
+
+/* Statement Checks */
+
+static SerdStatus
+statement_check_valid_literal(SerdValidator* const ctx,
+ const SerdStatement* const statement)
+{
+ const SerdNode* const object = serd_statement_object(statement);
+ if (serd_node_type(object) != SERD_LITERAL) {
+ return SERD_SUCCESS;
+ }
+
+ if (!literal_is_valid(ctx, statement, object, serd_node_datatype(object))) {
+ /* log_note(ctx, l, check, "Via " QUOTE_FMT, QUOTE_ARGS(object_string)); */
+
+ return SERD_BAD_DATA;
+ }
+
+ return SERD_SUCCESS;
+}
+
+/* Entry Points */
+
+SerdValidator*
+serd_validator_new(SerdWorld* const world)
+{
+ assert(world);
+
+ SerdValidator* const validator =
+ (SerdValidator*)serd_wcalloc(world, 1, sizeof(SerdValidator));
+
+ if (!validator) {
+ return NULL;
+ }
+
+ SerdNodes* const nodes = serd_world_nodes(world);
+
+ validator->world = world;
+ validator->true_node = serd_nodes_value(nodes, serd_bool(true));
+
+#define URI(prefix, suffix) \
+ validator->uris.prefix##_##suffix = \
+ serd_nodes_uri(nodes, SERD_STRING(NS_##prefix #suffix))
+
+ URI(owl, Class);
+ URI(owl, DatatypeProperty);
+ URI(owl, DeprecatedClass);
+ URI(owl, DeprecatedProperty);
+ URI(owl, FunctionalProperty);
+ URI(owl, InverseFunctionalProperty);
+ URI(owl, ObjectProperty);
+ URI(owl, Restriction);
+ URI(owl, Thing);
+ URI(owl, allValuesFrom);
+ URI(owl, cardinality);
+ URI(owl, deprecated);
+ URI(owl, equivalentClass);
+ URI(owl, maxCardinality);
+ URI(owl, minCardinality);
+ URI(owl, onDatatype);
+ URI(owl, onProperty);
+ URI(owl, someValuesFrom);
+ URI(owl, unionOf);
+ URI(owl, withRestrictions);
+ URI(rdf, PlainLiteral);
+ URI(rdf, Property);
+ URI(rdf, XMLLiteral);
+ URI(rdf, first);
+ URI(rdf, rest);
+ URI(rdf, type);
+ URI(rdfs, Class);
+ URI(rdfs, Datatype);
+ URI(rdfs, Literal);
+ URI(rdfs, Resource);
+ URI(rdfs, domain);
+ URI(rdfs, label);
+ URI(rdfs, range);
+ URI(rdfs, subClassOf);
+ URI(rdfs, subPropertyOf);
+ URI(xsd, anyURI);
+ URI(xsd, maxExclusive);
+ URI(xsd, maxInclusive);
+ URI(xsd, minExclusive);
+ URI(xsd, minInclusive);
+ URI(xsd, pattern);
+
+#undef URI
+
+ return validator;
+}
+
+void
+serd_validator_free(SerdValidator* const validator)
+{
+ serd_wfree(validator->world, validator);
+}
+
+SerdStatus
+serd_validator_enable_check(SerdValidator* const validator,
+ const SerdValidatorCheck check)
+{
+ assert(validator);
+
+ validator->checks |= (SerdValidatorChecks)(1ull << check);
+ return SERD_SUCCESS;
+}
+
+SerdStatus
+serd_validator_disable_check(SerdValidator* const validator,
+ const SerdValidatorCheck check)
+{
+ assert(validator);
+
+ validator->checks &= ~((SerdValidatorChecks)(1ull << check));
+ return SERD_SUCCESS;
+}
+
+static SerdStatus
+for_each_matching_check(SerdValidator* const validator,
+ const char* const regex,
+ SerdStatus (*function)(SerdValidator*,
+ SerdValidatorCheck))
+{
+ RerexPattern* const re = parse_regex(validator->world, NULL, regex);
+ if (!re) {
+ return SERD_BAD_ARG;
+ }
+
+ bool matched = false;
+ RerexMatcher* matcher = rerex_new_matcher(re);
+
+ for (uint64_t i = 0; i < N_CHECKS; ++i) {
+ if (rerex_match(matcher, check_names[i])) {
+ function(validator, (SerdValidatorCheck)i);
+ matched = true;
+ }
+ }
+
+ rerex_free_matcher(matcher);
+ rerex_free_pattern(re);
+
+ return matched ? SERD_SUCCESS : SERD_FAILURE;
+}
+
+SerdStatus
+serd_validator_enable_checks(SerdValidator* const validator,
+ const char* const regex)
+{
+ assert(validator);
+ assert(regex);
+
+ if (!strcmp(regex, "all")) {
+ validator->checks =
+ ~((SerdValidatorChecks)(1ull << SERD_CHECK_EXPLICIT_INSTANCE_TYPE));
+ return SERD_SUCCESS;
+ }
+
+ if (!strcmp(regex, "everything")) {
+ validator->checks = ~0u;
+ return SERD_SUCCESS;
+ }
+
+ return for_each_matching_check(validator, regex, serd_validator_enable_check);
+}
+
+SerdStatus
+serd_validator_disable_checks(SerdValidator* const validator,
+ const char* const regex)
+{
+ assert(validator);
+ assert(regex);
+
+ if (!strcmp(regex, "all")) {
+ validator->checks = 0u;
+ return SERD_SUCCESS;
+ }
+
+ return for_each_matching_check(
+ validator, regex, serd_validator_disable_check);
+}
+
+static SerdStatus
+check_literals(SerdValidator* const ctx)
+{
+ SerdStatus st = SERD_SUCCESS;
+
+ if (ctx->checks & ((1ul << SERD_CHECK_DATATYPE_TYPE) | //
+ (1ul << SERD_CHECK_LITERAL_INSTANCE) |
+ (1ul << SERD_CHECK_LITERAL_MAX_EXCLUSIVE) |
+ (1ul << SERD_CHECK_LITERAL_MAX_INCLUSIVE) |
+ (1ul << SERD_CHECK_LITERAL_MIN_EXCLUSIVE) |
+ (1ul << SERD_CHECK_LITERAL_MIN_INCLUSIVE) |
+ (1ul << SERD_CHECK_LITERAL_PATTERN) |
+ (1ul << SERD_CHECK_LITERAL_RESTRICTION) |
+ (1ul << SERD_CHECK_LITERAL_VALUE))) {
+ SerdCursor* const all =
+ serd_model_begin_ordered(ctx->model, SERD_ORDER_SPO);
+ SERD_FOREACH (statement, all) {
+ update_status(&st, statement_check_valid_literal(ctx, statement));
+ }
+ serd_cursor_free(all);
+ }
+
+ return st;
+}
+
+SerdStatus
+serd_validate(SerdValidator* const validator,
+ const SerdModel* const model,
+ const SerdNode* const graph,
+ const SerdEnv* const env)
+{
+ assert(validator);
+ assert(model);
+
+ SerdValidator* const ctx = validator;
+ SerdStatus st = SERD_SUCCESS;
+
+ ctx->env = env;
+ ctx->model = model;
+ ctx->graph = graph;
+
+ /* Check class/datatype cycles first, so we can give up early if any are
+ found. Dealing with non-trivial cycles everywhere makes some other checks
+ too complicated, and would likely just make the output more confusing
+ anyway. */
+
+ if (ctx->checks & (1ul << SERD_CHECK_CLASS_CYCLE)) {
+ update_status(&st, check_subclass_cycle(ctx));
+ }
+
+ if (ctx->checks & (1ul << SERD_CHECK_DATATYPE_CYCLE)) {
+ update_status(&st, check_ondatatype_cycle(ctx));
+ }
+
+ if (ctx->checks & (1ul << SERD_CHECK_PROPERTY_CYCLE)) {
+ update_status(&st, check_subproperty_cycle(ctx));
+ }
+
+ if (st) {
+ serd_logf(
+ ctx->world,
+ SERD_LOG_LEVEL_NOTICE,
+ "Recursive type or property definition found, aborting further checks");
+ return st;
+ }
+
+ /* No dangerous cycles that might hang the validator, so proceed with normal
+ checks. */
+
+ if (ctx->checks & (1ul << SERD_CHECK_PREDICATE_TYPE)) {
+ update_status(&st, check_predicate_type(ctx));
+ }
+
+ if (ctx->checks & (1ul << SERD_CHECK_CLASS_LABEL)) {
+ update_status(&st, check_class_label(ctx));
+ }
+
+ if (ctx->checks & (1ul << SERD_CHECK_DATATYPE_PROPERTY)) {
+ update_status(&st, check_datatype_property(ctx));
+ }
+
+ if (ctx->checks & ((1ul << SERD_CHECK_DEPRECATED_PROPERTY) |
+ (1ul << SERD_CHECK_DEPRECATED_CLASS))) {
+ update_status(&st, check_deprecated(ctx));
+ }
+
+ if (ctx->checks & (1ul << SERD_CHECK_FUNCTIONAL_PROPERTY)) {
+ update_status(&st, check_functional_property(ctx));
+ }
+
+ if (ctx->checks & (1ul << SERD_CHECK_INSTANCE_TYPE)) {
+ update_status(&st, check_instance(ctx));
+ }
+
+ if (ctx->checks & (1ul << SERD_CHECK_INVERSE_FUNCTIONAL_PROPERTY)) {
+ update_status(&st, check_inverse_functional_property(ctx));
+ }
+
+ if (ctx->checks & (1ul << SERD_CHECK_OBJECT_PROPERTY)) {
+ update_status(&st, check_object_property(ctx));
+ }
+
+ if (ctx->checks & (1ul << SERD_CHECK_PROPERTY_DOMAIN)) {
+ update_status(&st, check_property_domain(ctx));
+ }
+
+ if (ctx->checks & (1ul << SERD_CHECK_PROPERTY_LABEL)) {
+ update_status(&st, check_property_label(ctx));
+ }
+
+ if (ctx->checks & (1ul << SERD_CHECK_PROPERTY_RANGE)) {
+ update_status(&st, check_property_range(ctx));
+ }
+
+ update_status(&st, check_literals(ctx));
+
+ ctx->graph = NULL;
+
+ if (ctx->n_errors > 0) {
+ serd_logf(ctx->world,
+ SERD_LOG_LEVEL_ERROR,
+ "Failed %u of %u validation checks",
+ ctx->n_errors,
+ ctx->n_checks);
+
+ return SERD_BAD_DATA;
+ }
+
+ serd_logf(ctx->world,
+ SERD_LOG_LEVEL_INFO,
+ "Passed all %u validation checks",
+ ctx->n_checks);
+
+ return SERD_SUCCESS;
+}