diff options
author | David Robillard <d@drobilla.net> | 2018-05-27 15:48:25 +0200 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2022-01-28 21:57:29 -0500 |
commit | f93a441065a611cc32874dde67e53a8295c87baf (patch) | |
tree | 3793e2df1c365bf93fbe700c9428b54668f08c53 /src | |
parent | 2c5bee49b4494e172c4fa147af91bad199ed9362 (diff) | |
download | serd-f93a441065a611cc32874dde67e53a8295c87baf.tar.gz serd-f93a441065a611cc32874dde67e53a8295c87baf.tar.bz2 serd-f93a441065a611cc32874dde67e53a8295c87baf.zip |
[WIP] Add validation
Diffstat (limited to 'src')
-rw-r--r-- | src/log.c | 8 | ||||
-rw-r--r-- | src/string.c | 2 | ||||
-rw-r--r-- | src/validate.c | 2042 |
3 files changed, 2051 insertions, 1 deletions
@@ -158,8 +158,14 @@ serd_vxlogf(const SerdWorld* const world, // Format and print the message itself vfprintf(stderr, fmt, args); - fprintf(stderr, "\n"); + // Print clang-tidy-style check name (validation errors) + const char* const check = get_log_field(n_fields, fields, "SERD_CHECK"); + if (check) { + fprintf(stderr, " [%s]", check); + } + + fprintf(stderr, "\n"); return SERD_SUCCESS; } diff --git a/src/string.c b/src/string.c index 1b9bb64a..77cd0191 100644 --- a/src/string.c +++ b/src/string.c @@ -75,6 +75,8 @@ serd_strerror(const SerdStatus status) return "Invalid or unresolved URI"; case SERD_BAD_WRITE: return "Error writing to file"; + case SERD_BAD_DATA: + return "Invalid data"; } return "Unknown error"; diff --git a/src/validate.c b/src/validate.c new file mode 100644 index 00000000..b300c0af --- /dev/null +++ b/src/validate.c @@ -0,0 +1,2042 @@ +/* + Copyright 2012-2021 David Robillard <d@drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "memory.h" + +#include "exess/exess.h" +#include "rerex/rerex.h" +#include "serd/serd.h" + +#include <assert.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#define NS_owl "http://www.w3.org/2002/07/owl#" +#define NS_rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#" +#define NS_rdfs "http://www.w3.org/2000/01/rdf-schema#" +#define NS_xsd "http://www.w3.org/2001/XMLSchema#" + +#define SERD_FOREACH(name, range) \ + for (const SerdStatement*(name) = NULL; \ + !serd_cursor_is_end(range) && ((name) = serd_cursor_get(range)); \ + serd_cursor_advance(range)) + +#define SERD_FOREACH_NODE(field, name, range) \ + for (const SerdNode*(name) = NULL; \ + !serd_cursor_is_end(range) && \ + ((name) = serd_statement_node( \ + (const SerdStatement* SERD_NONNULL)serd_cursor_get(range), field)); \ + serd_cursor_advance(range)) + +#define QUOTE_FMT "%s%s%s" +#define QUOTE_ARGS(quote) quote.prefix.buf, quote.string.buf, quote.suffix.buf + +#define N_CHECKS 34 + +static const char* const check_names[N_CHECKS] = { + "nothing", // + "allValuesFrom", // + "anyUri", // + "cardinalityEqual", // + "cardinalityMax", // + "cardinalityMin", // + "classCycle", // + "classLabel", // + "datatypeCycle", // + "datatypeProperty", // + "datatypeType", // + "deprecatedClass", // + "deprecatedProperty", // + "explicitInstanceType", // + "functionalProperty", // + "instanceLiteral", // + "instanceType", // + "inverseFunctionalProperty", // + "literalInstance", // + "literalMaxExclusive", // + "literalMaxInclusive", // + "literalMinExclusive", // + "literalMinInclusive", // + "literalPattern", // + "literalRestriction", // + "literalValue", // + "objectProperty", // + "plainLiteralDatatype", // + "predicateType", // + "propertyCycle", // + "propertyDomain", // + "propertyLabel", // + "propertyRange", // + "someValuesFrom", // +}; + +/// Bitwise OR of SerdValidatorCheck values +typedef uint64_t SerdValidatorChecks; + +typedef unsigned long Count; + +typedef struct { + const SerdNode* owl_Class; + const SerdNode* owl_DatatypeProperty; + const SerdNode* owl_DeprecatedClass; + const SerdNode* owl_DeprecatedProperty; + const SerdNode* owl_FunctionalProperty; + const SerdNode* owl_InverseFunctionalProperty; + const SerdNode* owl_ObjectProperty; + const SerdNode* owl_Restriction; + const SerdNode* owl_Thing; + const SerdNode* owl_allValuesFrom; + const SerdNode* owl_cardinality; + const SerdNode* owl_deprecated; + const SerdNode* owl_equivalentClass; + const SerdNode* owl_maxCardinality; + const SerdNode* owl_minCardinality; + const SerdNode* owl_onDatatype; + const SerdNode* owl_onProperty; + const SerdNode* owl_someValuesFrom; + const SerdNode* owl_unionOf; + const SerdNode* owl_withRestrictions; + const SerdNode* rdf_PlainLiteral; + const SerdNode* rdf_Property; + const SerdNode* rdf_XMLLiteral; + const SerdNode* rdf_first; + const SerdNode* rdf_rest; + const SerdNode* rdf_type; + const SerdNode* rdfs_Class; + const SerdNode* rdfs_Datatype; + const SerdNode* rdfs_Literal; + const SerdNode* rdfs_Resource; + const SerdNode* rdfs_domain; + const SerdNode* rdfs_label; + const SerdNode* rdfs_range; + const SerdNode* rdfs_subClassOf; + const SerdNode* rdfs_subPropertyOf; + const SerdNode* xsd_anyURI; + const SerdNode* xsd_maxExclusive; + const SerdNode* xsd_maxInclusive; + const SerdNode* xsd_minExclusive; + const SerdNode* xsd_minInclusive; + const SerdNode* xsd_pattern; +} URIs; + +struct SerdValidatorImpl { + const SerdWorld* world; + const SerdEnv* env; + const SerdModel* model; + const SerdNode* graph; + const SerdNode* true_node; + URIs uris; + SerdValidatorChecks checks; + unsigned n_errors; + unsigned n_checks; + bool suppressed; +}; + +typedef struct { + const char* name; +} Check; + +static SerdStatus +check_instance_type(SerdValidator* ctx, + SerdValidatorCheck check, + const SerdNode* root_klass, + const SerdStatement* statement, + const SerdNode* instance, + const SerdNode* klass); + +static SerdStatus +check_instance_restriction(SerdValidator* ctx, + const SerdNode* root_klass, + const SerdStatement* statement, + const SerdNode* instance, + const SerdNode* restriction); + +typedef struct { + SerdStringView prefix; + SerdStringView string; + SerdStringView suffix; +} NodeQuote; + +static NodeQuote +format_node(const SerdValidator* const ctx, const SerdNode* const node) +{ + const SerdStringView empty = SERD_EMPTY_STRING(); + NodeQuote q = {empty, empty, empty}; + SerdStringView prefix = empty; + SerdStringView suffix = empty; + const SerdStringView node_view = serd_node_string_view(node); + + switch (serd_node_type(node)) { + case SERD_LITERAL: + q.prefix = SERD_STRING("\""); + q.string = node_view; + q.suffix = SERD_STRING("\""); + break; + case SERD_URI: + if (!serd_env_qualify(ctx->env, node_view, &prefix, &suffix)) { + q.prefix = prefix; + q.string = SERD_STRING(":"); + q.suffix = suffix; + } else { + q.prefix = SERD_STRING("<"); + q.string = node_view; + q.suffix = SERD_STRING(">"); + } + break; + case SERD_BLANK: + q.prefix = SERD_STRING("_:"); + q.string = node_view; + break; + case SERD_VARIABLE: + q.prefix = SERD_STRING("?"); + q.string = node_view; + break; + }; + + return q; +} + +SERD_LOG_FUNC(5, 0) +static void +vreportf(SerdValidator* const ctx, + const SerdValidatorCheck check, + const SerdLogLevel level, + const SerdStatement* const statement, + const char* const fmt, + va_list args) +{ + const char* file = NULL; + char line[24] = {0}; + char col[24] = {0}; + char status_string[12] = {0}; + + snprintf(status_string, sizeof(status_string), "%d", SERD_BAD_DATA); + + const SerdCaret* const caret = + statement ? serd_statement_caret(statement) : NULL; + + if (caret) { + file = serd_node_string(serd_caret_name(caret)); + + snprintf(line, sizeof(line), "%u", serd_caret_line(caret)); + snprintf(col, sizeof(col), "%u", serd_caret_column(caret)); + } + + const SerdLogField fields[] = {{"SERD_STATUS", status_string}, + {"SERD_CHECK", check_names[check]}, + {"SERD_FILE", file}, + {"SERD_LINE", line}, + {"SERD_COL", col}}; + + serd_vxlogf(ctx->world, level, caret ? 5u : 2u, fields, fmt, args); +} + +SERD_LOG_FUNC(5, 6) +static SerdStatus +report_check(SerdValidator* const ctx, + const SerdStatement* const statement, + const SerdValidatorCheck check, + const bool condition, + const char* const fmt, + ...) +{ + if (!ctx->suppressed) { + if (!(ctx->checks & (1ull << check))) { + return SERD_SUCCESS; + } + + ctx->n_checks += 1; + ctx->n_errors += condition ? 0 : 1; + + if (!condition) { + va_list args; + va_start(args, fmt); + vreportf(ctx, check, SERD_LOG_LEVEL_ERROR, statement, fmt, args); + va_end(args); + } + } + + return condition ? SERD_SUCCESS : SERD_BAD_DATA; +} + +SERD_LOG_FUNC(4, 5) +static void +log_note(SerdValidator* const ctx, + const SerdStatement* const statement, + const SerdValidatorCheck check, + const char* const fmt, + ...) +{ + if (!ctx->suppressed) { + va_list args; + va_start(args, fmt); + vreportf(ctx, check, SERD_LOG_LEVEL_NOTICE, statement, fmt, args); + va_end(args); + } +} + +/* + Return true iff `child` is a descendant of `parent` by `pred` arcs. + + That is, returns true if there is a path from `child` to `parent` by + following `pred` arcs starting from child. +*/ +static bool +is_descendant(SerdValidator* const ctx, + const SerdNode* const child, + const SerdNode* const parent, + const SerdNode* const pred) +{ + if (serd_node_equals(child, parent) || + serd_model_ask( + ctx->model, child, ctx->uris.owl_equivalentClass, parent, NULL)) { + return true; + } + + SerdCursor* const i = serd_model_find(ctx->model, child, pred, NULL, NULL); + SERD_FOREACH_NODE (SERD_OBJECT, o, i) { + if (!serd_node_equals(child, o) && is_descendant(ctx, o, parent, pred)) { + serd_cursor_free(i); + return true; + } + } + + serd_cursor_free(i); + return false; +} + +// Return true iff `klass` is a subclass of `super` +static bool +is_subclass(SerdValidator* const ctx, + const SerdNode* const klass, + const SerdNode* const super) +{ + return serd_node_equals(klass, super) || + is_descendant(ctx, klass, super, ctx->uris.rdfs_subClassOf); +} + +// Return true iff `datatype` is a subdatatype of `super` +static bool +is_subdatatype(SerdValidator* const ctx, + const SerdNode* const datatype, + const SerdNode* const super) +{ + return serd_node_equals(datatype, super) || + is_descendant(ctx, datatype, super, ctx->uris.owl_onDatatype); +} + +static void +update_status(SerdStatus* const old, const SerdStatus next) +{ + *old = next > *old ? next : *old; +} + +static SerdStatus +merge_status(const SerdStatus a, const SerdStatus b) +{ + return a > b ? a : b; +} + +static int +bound_cmp(SerdValidator* const ctx, + const SerdStatement* const literal_statement, + const SerdNode* const literal, + const SerdNode* const type, + const SerdStatement* const bound_statement, + const SerdNode* const bound) +{ + const ExessDatatype value_type = + exess_datatype_from_uri(serd_node_string(type)); + + if (value_type == EXESS_NOTHING) { + return strcmp(serd_node_string(literal), serd_node_string(bound)); + } + + ExessValue bound_value = {false}; + + const ExessVariableResult br = exess_read_value( + value_type, sizeof(bound_value), &bound_value, serd_node_string(bound)); + + if (br.status) { + return !!serd_logf_at(ctx->world, + SERD_LOG_LEVEL_ERROR, + serd_statement_caret(bound_statement), + "Failed to parse bound literal \"%s\" (%s)", + serd_node_string(bound), + exess_strerror(br.status)); + } + + ExessValue literal_value = {false}; + + const ExessVariableResult lr = exess_read_value(value_type, + sizeof(literal_value), + &literal_value, + serd_node_string(literal)); + + if (lr.status) { + return !!serd_logf_at(ctx->world, + SERD_LOG_LEVEL_ERROR, + serd_statement_caret(literal_statement), + "Failed to parse literal \"%s\" (%s)", + serd_node_string(literal), + exess_strerror(lr.status)); + } + + return exess_value_compare(value_type, + lr.write_count, + &literal_value, + value_type, + br.write_count, + &bound_value); +} + +static bool +less(const int cmp) +{ + return cmp < 0; +} + +static bool +less_equal(const int cmp) +{ + return cmp <= 0; +} + +static bool +greater(const int cmp) +{ + return cmp > 0; +} + +static bool +greater_equal(const int cmp) +{ + return cmp >= 0; +} + +static SerdStatus +check_bound(SerdValidator* const ctx, + const SerdValidatorCheck check, + const SerdStatement* const statement, + const SerdNode* const literal, + const SerdNode* const type, + const SerdNode* const restriction, + const SerdNode* const bound_property, + bool (*pred)(int), + const char* const message) +{ + SerdCursor* const b = + serd_model_find(ctx->model, restriction, bound_property, 0, 0); + + if (serd_cursor_is_end(b)) { + serd_cursor_free(b); + return SERD_SUCCESS; + } + + const SerdStatement* const bound_statement = + (const SerdStatement* SERD_NONNULL)serd_cursor_get(b); + + const SerdNode* const bound = serd_statement_object(bound_statement); + + const int cmp = + bound_cmp(ctx, statement, literal, type, bound_statement, bound); + + serd_cursor_free(b); + + return report_check(ctx, + statement, + check, + pred(cmp), + "Value \"%s\" %s \"%s\"", + serd_node_string(literal), + message, + serd_node_string(bound)); +} + +static RerexPattern* +parse_regex(const SerdWorld* const world, + const SerdStatement* const pattern_statement, + const char* const regex) +{ + const SerdCaret* const caret = + pattern_statement ? serd_statement_caret(pattern_statement) : NULL; + + RerexPattern* re = NULL; + size_t end = 0; + const RerexStatus st = rerex_compile(regex, &end, &re); + if (st) { + serd_logf_at(world, + SERD_LOG_LEVEL_ERROR, + caret, + "Error in pattern \"%s\" at offset %lu (%s)", + regex, + (unsigned long)end, + rerex_strerror(st)); + return NULL; + } + + return re; +} + +static bool +regex_match(SerdValidator* const ctx, + const SerdStatement* const pattern_statement, + const char* const regex, + const char* const str) +{ + RerexPattern* const re = parse_regex(ctx->world, pattern_statement, regex); + if (!re) { + return false; + } + + RerexMatcher* const matcher = rerex_new_matcher(re); + const bool ret = rerex_match(matcher, str); + + rerex_free_matcher(matcher); + rerex_free_pattern(re); + + return ret; +} + +static SerdStatus +check_literal_restriction(SerdValidator* const ctx, + const SerdStatement* const statement, + const SerdNode* const literal, + const SerdNode* const type, + const SerdNode* const restriction) +{ + SerdStatus st = SERD_SUCCESS; + + // Check xsd:pattern + const SerdStatement* const pat_statement = serd_model_get_statement( + ctx->model, restriction, ctx->uris.xsd_pattern, 0, 0); + if (pat_statement) { + const char* const str = serd_node_string(literal); + const SerdNode* const pat_node = serd_statement_object(pat_statement); + const char* const pat = serd_node_string(pat_node); + + st = merge_status(st, + report_check(ctx, + statement, + SERD_CHECK_LITERAL_PATTERN, + regex_match(ctx, pat_statement, pat, str), + "Value \"%s\" doesn't match pattern \"%s\"", + serd_node_string(literal), + pat)); + } + + // Check inclusive/exclusive min and max + + typedef bool (*BoundCmpPredicate)(int); + + typedef struct { + SerdValidatorCheck check_id; + const SerdNode* restriction_property; + BoundCmpPredicate pred; + const char* const message; + } BoundCheck; + + const BoundCheck bound_checks[] = { + {SERD_CHECK_LITERAL_MIN_INCLUSIVE, + ctx->uris.xsd_minInclusive, + greater_equal, + "<"}, + {SERD_CHECK_LITERAL_MAX_INCLUSIVE, + ctx->uris.xsd_maxInclusive, + less_equal, + ">"}, + {SERD_CHECK_LITERAL_MIN_EXCLUSIVE, + ctx->uris.xsd_minExclusive, + greater, + "<="}, + {SERD_CHECK_LITERAL_MAX_EXCLUSIVE, ctx->uris.xsd_maxExclusive, less, ">="}, + }; + + for (size_t i = 0; i < sizeof(bound_checks) / sizeof(BoundCheck); ++i) { + st = merge_status(st, + check_bound(ctx, + bound_checks[i].check_id, + statement, + literal, + type, + restriction, + bound_checks[i].restriction_property, + bound_checks[i].pred, + bound_checks[i].message)); + } + + return st; +} + +static bool +literal_is_valid(SerdValidator* const ctx, + const SerdStatement* const statement, + const SerdNode* const literal, + const SerdNode* const type) +{ + if (!type) { + return true; + } + + // Check that datatype is defined + const SerdNode* const node_datatype = serd_node_datatype(literal); + if (node_datatype && report_check(ctx, + statement, + SERD_CHECK_DATATYPE_TYPE, + serd_model_ask(ctx->model, + node_datatype, + ctx->uris.rdf_type, + ctx->uris.rdfs_Datatype, + NULL), + "Undefined datatype <%s>", + serd_node_string(node_datatype))) { + return false; + } + + const NodeQuote type_quote = format_node(ctx, type); + + const ExessDatatype value_type = + node_datatype ? exess_datatype_from_uri(serd_node_string(node_datatype)) + : EXESS_NOTHING; + + if (value_type != EXESS_NOTHING) { + /* Check if the literal parses correctly by measuring the canonical string. + This is better than trying to read a variant here, because it + automatically supports some unbounded datatypes like xsd:decimal and + xsd:base64Binary without needing to allocate space for the value. */ + + const ExessResult r = + exess_write_canonical(serd_node_string(literal), value_type, 0, NULL); + + if (report_check(ctx, + statement, + SERD_CHECK_LITERAL_VALUE, + r.status == EXESS_SUCCESS, + "Invalid xsd:%s literal \"%s\" (%s)", + serd_node_string(node_datatype) + sizeof(EXESS_XSD_URI) - + 1, + serd_node_string(literal), + exess_strerror(r.status))) { + return false; + } + } + + // Find restrictions list + const SerdNode* head = + serd_model_get(ctx->model, type, ctx->uris.owl_withRestrictions, 0, 0); + + // Walk list, checking each restriction + while (head) { + SerdCursor* const i_first = + serd_model_find(ctx->model, head, ctx->uris.rdf_first, 0, 0); + + if (serd_cursor_is_end(i_first)) { + serd_cursor_free(i_first); + break; + } + + const SerdStatement* const s_first = + (const SerdStatement* SERD_NONNULL)serd_cursor_get(i_first); + + const SerdNode* const first = serd_statement_object(s_first); + + // Check this restriction + if (check_literal_restriction(ctx, statement, literal, type, first)) { + log_note(ctx, + s_first, + SERD_CHECK_LITERAL_RESTRICTION, + "Restriction on datatype " QUOTE_FMT, + QUOTE_ARGS(type_quote)); + serd_cursor_free(i_first); + return false; + } + + // Seek to next list node + head = serd_model_get(ctx->model, head, ctx->uris.rdf_rest, 0, 0); + serd_cursor_free(i_first); + } + + // Recurse up datatype hierarchy + const SerdNode* const super = + serd_model_get(ctx->model, type, ctx->uris.owl_onDatatype, 0, 0); + + // FIXME: check for cycles + return super ? literal_is_valid(ctx, statement, literal, super) : true; +} + +static bool +is_a(SerdValidator* const ctx, + const SerdNode* const node, + const SerdNode* const type) +{ + if (serd_model_ask(ctx->model, node, ctx->uris.rdf_type, type, 0)) { + return true; // Instance explicitly has this type + } + + SerdCursor* const node_types = + serd_model_find(ctx->model, node, ctx->uris.rdf_type, NULL, NULL); + + SERD_FOREACH_NODE (SERD_OBJECT, node_type, node_types) { + if (is_subclass(ctx, node_type, type)) { + serd_cursor_free(node_types); + return true; // Instance explicitly has a subtype of this type + } + } + + serd_cursor_free(node_types); + return false; +} + +static SerdStatus +check_instance_union_type(SerdValidator* const ctx, + const SerdValidatorCheck check, + const SerdNode* const root_klass, + const SerdStatement* const statement, + const SerdNode* const instance, + const SerdNode* const klass) +{ + SerdStatus st = SERD_SUCCESS; + + // Check owl:unionOf + /* if (serd_node_type(klass) == SERD_BLANK) { */ + const SerdNode* const union_list = + serd_model_get(ctx->model, klass, ctx->uris.owl_unionOf, NULL, NULL); + + for (const SerdNode* l = union_list; l; + l = serd_model_get(ctx->model, l, ctx->uris.rdf_rest, NULL, NULL)) { + const SerdNode* const element = + serd_model_get(ctx->model, l, ctx->uris.rdf_first, NULL, NULL); + if (element) { + ctx->suppressed = true; + + st = check_instance_type( + ctx, check, root_klass, statement, instance, element); + + ctx->suppressed = false; + if (!st) { + return SERD_SUCCESS; + } + } + } + + if (union_list) { + return report_check(ctx, + statement, + check, + false, + "Instance " QUOTE_FMT " is not any type in union", + QUOTE_ARGS(format_node(ctx, instance))); + } + /* } */ + + return st; +} + +static SerdStatus +check_instance_super_types(SerdValidator* const ctx, + const SerdValidatorCheck check, + const SerdStatement* const statement, + const SerdNode* const instance, + const SerdNode* const klass) +{ + SerdCursor* const supers = + serd_model_find(ctx->model, klass, ctx->uris.rdfs_subClassOf, NULL, NULL); + + SERD_FOREACH_NODE (SERD_OBJECT, super, supers) { + if (!serd_node_equals(klass, super) && + !serd_node_equals(super, ctx->uris.rdfs_Class) && + !serd_node_equals(super, ctx->uris.owl_Class)) { + if (check_instance_type(ctx, check, klass, statement, instance, super)) { + if (serd_node_type(super) == SERD_URI) { + log_note(ctx, + serd_cursor_get(supers), + check, + "A " QUOTE_FMT " is a " QUOTE_FMT, + QUOTE_ARGS(format_node(ctx, klass)), + QUOTE_ARGS(format_node(ctx, super))); + } + + serd_cursor_free(supers); + return SERD_BAD_DATA; + } + } + } + serd_cursor_free(supers); + + return SERD_SUCCESS; +} + +static SerdStatus +check_instance_type(SerdValidator* const ctx, + const SerdValidatorCheck check, + const SerdNode* const root_klass, + const SerdStatement* const statement, + const SerdNode* const instance, + const SerdNode* const klass) +{ + SerdStatus st = SERD_SUCCESS; + + // Every instance is inherently a rdfs:Resource and owl:Thing + if (serd_node_equals(klass, ctx->uris.rdfs_Resource) || + serd_node_equals(klass, ctx->uris.owl_Thing)) { + return SERD_SUCCESS; + } + + // If the class is xsd:anyURI, check that the instance node is a URI + if (serd_node_equals(klass, ctx->uris.xsd_anyURI) || + is_subdatatype(ctx, klass, ctx->uris.xsd_anyURI)) { + return report_check(ctx, + statement, + SERD_CHECK_ANY_URI, + serd_node_type(instance) == SERD_URI, + "Node " QUOTE_FMT " isn't a URI", + QUOTE_ARGS(format_node(ctx, instance))); + } + + // Check that instance is not somehow also a rdfs:Literal or rdfs:Datatype + if (report_check(ctx, + statement, + SERD_CHECK_INSTANCE_LITERAL, + (!is_subclass(ctx, klass, ctx->uris.rdfs_Literal) && + !is_a(ctx, klass, ctx->uris.rdfs_Datatype)), + "Instance " QUOTE_FMT " isn't a literal", + QUOTE_ARGS(format_node(ctx, instance)))) { + return SERD_BAD_DATA; + } + + // Check the instance against the class restriction (if applicable) + if ((st = check_instance_restriction( + ctx, root_klass, statement, instance, klass))) { + return st; + } + + // Check the instance against each type in the union class (if applicable) + if ((st = check_instance_union_type( + ctx, check, root_klass, statement, instance, klass))) { + return st; + } + + // Check the instance against any superclasses of the class + if ((st = + check_instance_super_types(ctx, check, statement, instance, klass))) { + return st; + } + + // No contradictions, so succeed if the instance explicitly has this type + if (is_a(ctx, instance, klass)) { + return SERD_SUCCESS; + } + + if ((ctx->checks & (1ul << SERD_CHECK_EXPLICIT_INSTANCE_TYPE)) && + serd_node_type(instance) != SERD_BLANK) { + return report_check(ctx, + statement, + SERD_CHECK_EXPLICIT_INSTANCE_TYPE, + false, + "Instance " QUOTE_FMT + " does not explicitly have type " QUOTE_FMT, + QUOTE_ARGS(format_node(ctx, instance)), + QUOTE_ARGS(format_node(ctx, klass))); + } + + return SERD_SUCCESS; +} + +static SerdStatus +check_type(SerdValidator* const ctx, + const SerdValidatorCheck check, + const SerdStatement* const statement, + const SerdNode* const node, + const SerdNode* const type) +{ + const NodeQuote type_quote = format_node(ctx, type); + + // Everything is an rdfs:Resource + if (serd_node_equals(type, ctx->uris.rdfs_Resource)) { + return SERD_SUCCESS; + } + + switch (serd_node_type(node)) { + case SERD_LITERAL: + // Every literal is an rdfs:Literal + if (serd_node_equals(type, ctx->uris.rdfs_Literal)) { + return SERD_SUCCESS; + } + + // A plain literal can not have a datatype + if (serd_node_equals(type, ctx->uris.rdf_PlainLiteral)) { + if (report_check(ctx, + statement, + SERD_CHECK_PLAIN_LITERAL_DATATYPE, + !serd_node_datatype(node), + "Typed literal \"%s\" isn't a plain literal", + serd_node_string(node))) { + return SERD_BAD_DATA; + } + } else if (report_check(ctx, + statement, + SERD_CHECK_LITERAL_INSTANCE, + is_a(ctx, type, ctx->uris.rdfs_Datatype), + "Literal \"%s\" isn't an instance of " QUOTE_FMT, + serd_node_string(node), + QUOTE_ARGS(type_quote))) { + return SERD_BAD_DATA; + } + + return literal_is_valid(ctx, statement, node, type) ? SERD_SUCCESS + : SERD_BAD_DATA; + + case SERD_URI: + if (serd_node_equals(type, ctx->uris.xsd_anyURI)) { + return SERD_SUCCESS; + } + break; + + case SERD_BLANK: + case SERD_VARIABLE: + break; + } + + return check_instance_type(ctx, check, type, statement, node, type); +} + +static Count +count_non_blanks(SerdCursor* const i, const SerdField field) +{ + Count n = 0u; + SERD_FOREACH (s, i) { + const SerdNode* node = serd_statement_node(s, field); + if (node && serd_node_type(node) != SERD_BLANK) { + ++n; + } + } + return n; +} + +static SerdStatus +check_cardinality_restriction(SerdValidator* const ctx, + const SerdNode* const root_klass, + const SerdNode* const restriction, + const SerdStatement* const statement, + const SerdNode* const instance) +{ + const SerdNode* const prop = serd_model_get( + ctx->model, restriction, ctx->uris.owl_onProperty, NULL, NULL); + + const SerdStatement* const equal_statement = serd_model_get_statement( + ctx->model, restriction, ctx->uris.owl_cardinality, NULL, NULL); + + const SerdStatement* const min_statement = serd_model_get_statement( + ctx->model, restriction, ctx->uris.owl_minCardinality, NULL, NULL); + + const SerdStatement* const max_statement = serd_model_get_statement( + ctx->model, restriction, ctx->uris.owl_maxCardinality, NULL, NULL); + + if (!equal_statement && !min_statement && !max_statement) { + return SERD_SUCCESS; + } + + const NodeQuote prop_quote = format_node(ctx, prop); + const NodeQuote klass_quote = format_node(ctx, root_klass); + + SerdStatus st = SERD_SUCCESS; + const Count n_values = + (Count)serd_model_count(ctx->model, instance, prop, NULL, NULL); + + // Check owl:cardinality + if (equal_statement) { + const SerdNode* card = serd_statement_object(equal_statement); + const Count expected = strtoul(serd_node_string(card), NULL, 10); + if ((st = report_check(ctx, + statement, + SERD_CHECK_CARDINALITY_EQUAL, + n_values == expected, + "Instance " QUOTE_FMT " has %lu " QUOTE_FMT + " properties", + QUOTE_ARGS(format_node(ctx, instance)), + n_values, + QUOTE_ARGS(prop_quote)))) { + log_note(ctx, + equal_statement, + SERD_CHECK_CARDINALITY_EQUAL, + "A " QUOTE_FMT " must have exactly %lu", + QUOTE_ARGS(klass_quote), + expected); + return st; + } + } + + // Check owl:minCardinality + if (min_statement) { + const SerdNode* card = serd_statement_object(min_statement); + const Count n_min = strtoul(serd_node_string(card), NULL, 10); + if ((st = report_check(ctx, + statement, + SERD_CHECK_CARDINALITY_MIN, + n_values >= n_min, + "Instance " QUOTE_FMT " has %lu " QUOTE_FMT + " properties", + QUOTE_ARGS(format_node(ctx, instance)), + n_values, + QUOTE_ARGS(prop_quote)))) { + log_note(ctx, + min_statement, + SERD_CHECK_CARDINALITY_MIN, + "A " QUOTE_FMT " must have at least %lu", + QUOTE_ARGS(klass_quote), + n_min); + return st; + } + } + + // Check owl:maxCardinality + if (max_statement) { + const SerdNode* const card = serd_statement_object(max_statement); + const Count n_max = strtoul(serd_node_string(card), NULL, 10); + if ((st = report_check(ctx, + statement, + SERD_CHECK_CARDINALITY_MAX, + n_values <= n_max, + "Instance " QUOTE_FMT " has %lu " QUOTE_FMT + " properties", + QUOTE_ARGS(format_node(ctx, instance)), + n_values, + QUOTE_ARGS(prop_quote)))) { + log_note(ctx, + max_statement, + SERD_CHECK_CARDINALITY_MAX, + "A " QUOTE_FMT " must have at most %lu", + QUOTE_ARGS(klass_quote), + n_max); + return st; + } + } + + return st; +} + +static SerdStatus +check_property_value_restriction(SerdValidator* const ctx, + const SerdNode* const root_klass, + const SerdNode* const restriction, + const SerdStatement* const statement, + const SerdNode* const instance) +{ + SerdStatus st = SERD_SUCCESS; + + const SerdNode* const prop = serd_model_get( + ctx->model, restriction, ctx->uris.owl_onProperty, NULL, NULL); + + const SerdStatement* const all_statement = serd_model_get_statement( + ctx->model, restriction, ctx->uris.owl_allValuesFrom, NULL, NULL); + + const SerdStatement* const some_statement = serd_model_get_statement( + ctx->model, restriction, ctx->uris.owl_someValuesFrom, NULL, NULL); + + if (!all_statement && !some_statement) { + return SERD_SUCCESS; + } + + const NodeQuote prop_quote = format_node(ctx, prop); + const NodeQuote klass_quote = format_node(ctx, root_klass); + + SerdCursor* const values = + serd_model_find(ctx->model, instance, prop, NULL, NULL); + + if (all_statement) { + const SerdNode* const type = serd_statement_object(all_statement); + const NodeQuote type_quote = format_node(ctx, type); + SERD_FOREACH (v, values) { + const SerdNode* const value = serd_statement_object(v); + const SerdStatus all_st = report_check( + ctx, + v, + SERD_CHECK_ALL_VALUES_FROM, + !check_type(ctx, SERD_CHECK_ALL_VALUES_FROM, v, value, type), + "Value isn't a " QUOTE_FMT, + QUOTE_ARGS(type_quote)); + + if (all_st) { + st = merge_status(st, all_st); + log_note(ctx, + all_statement, + SERD_CHECK_ALL_VALUES_FROM, + "Required for any " QUOTE_FMT " of a " QUOTE_FMT, + QUOTE_ARGS(prop_quote), + QUOTE_ARGS(klass_quote)); + } + } + } + + if (some_statement) { + const SerdNode* const type = serd_statement_object(some_statement); + const NodeQuote type_quote = format_node(ctx, type); + + // Search for some value with the required type + bool found = false; + { + ctx->suppressed = true; + SERD_FOREACH_NODE (SERD_OBJECT, value, values) { + if (!check_type( + ctx, SERD_CHECK_SOME_VALUES_FROM, statement, value, type)) { + found = true; + break; + } + } + ctx->suppressed = false; + } + + assert(!ctx->suppressed); + const SerdStatus some_st = + report_check(ctx, + statement, + SERD_CHECK_SOME_VALUES_FROM, + found, + QUOTE_FMT " has no " QUOTE_FMT " that is a " QUOTE_FMT, + QUOTE_ARGS(format_node(ctx, instance)), + QUOTE_ARGS(prop_quote), + QUOTE_ARGS(type_quote)); + + if (some_st && (ctx->checks & (1ull << SERD_CHECK_SOME_VALUES_FROM))) { + log_note(ctx, + some_statement, + SERD_CHECK_SOME_VALUES_FROM, + "An instance of " QUOTE_FMT " must have at least 1", + QUOTE_ARGS(klass_quote)); + } + + st = merge_status(st, some_st); + } + + serd_cursor_free(values); + + return st; +} + +static SerdStatus +check_instance_restriction(SerdValidator* const ctx, + const SerdNode* const root_klass, + const SerdStatement* const statement, + const SerdNode* const instance, + const SerdNode* const restriction) +{ + SerdStatus st = SERD_SUCCESS; + + st = merge_status(st, + check_cardinality_restriction( + ctx, root_klass, restriction, statement, instance)); + + st = merge_status(st, + check_property_value_restriction( + ctx, root_klass, restriction, statement, instance)); + + return st; +} + +/* Top-Level Checks */ + +static SerdStatus +check_class_label(SerdValidator* const ctx) +{ + const SerdModel* const model = ctx->model; + const URIs* const uris = &ctx->uris; + SerdStatus st = SERD_SUCCESS; + + // For each rdfs:Class + SerdCursor* const klasses = + serd_model_find(model, NULL, uris->rdf_type, uris->rdfs_Class, ctx->graph); + SERD_FOREACH (k, klasses) { + const SerdNode* const klass = serd_statement_subject(k); + + // Check that it has an rdfs:label in the same graph + st = merge_status( + st, + report_check( + ctx, + k, + SERD_CHECK_CLASS_LABEL, + serd_model_ask(ctx->model, klass, uris->rdfs_label, 0, ctx->graph), + "Class <%s> has no label", + serd_node_string(klass))); + } + serd_cursor_free(klasses); + + return st; +} + +static SerdStatus +check_datatype_property(SerdValidator* const ctx) +{ + const SerdModel* const model = ctx->model; + const URIs* uris = &ctx->uris; + SerdStatus st = SERD_SUCCESS; + + // For each owl:DatatypeProperty + SerdCursor* const properties = serd_model_find( + model, NULL, uris->rdf_type, uris->owl_DatatypeProperty, NULL); + SERD_FOREACH (p, properties) { + const SerdNode* const prop = serd_statement_subject(p); + const NodeQuote prop_quote = format_node(ctx, prop); + + // For each statement of this property in the target graph + SerdCursor* const statements = + serd_model_find(model, NULL, prop, NULL, ctx->graph); + SERD_FOREACH (s, statements) { + const SerdNode* const object = serd_statement_object(s); + + // Check that the object is a literal + if ((st = report_check(ctx, + s, + SERD_CHECK_DATATYPE_PROPERTY, + serd_node_type(object) == SERD_LITERAL, + QUOTE_FMT " isn't a literal", + QUOTE_ARGS(format_node(ctx, object))))) { + log_note(ctx, + p, + SERD_CHECK_DATATYPE_PROPERTY, + "A " QUOTE_FMT " must be a literal", + QUOTE_ARGS(prop_quote)); + } + } + serd_cursor_free(statements); + } + serd_cursor_free(properties); + + return st; +} + +static SerdStatus +check_deprecated(SerdValidator* const ctx) +{ + const SerdModel* const model = ctx->model; + SerdStatus st = SERD_SUCCESS; + + // For each deprecated thing + SerdCursor* const things = serd_model_find( + model, NULL, ctx->uris.owl_deprecated, ctx->true_node, NULL); + SERD_FOREACH (t, things) { + const SerdNode* const thing = serd_statement_subject(t); + const NodeQuote thing_quote = format_node(ctx, thing); + + if (is_a(ctx, thing, ctx->uris.rdf_Property)) { + // For each statement of this property in the target graph + SerdCursor* const statements = + serd_model_find(model, NULL, thing, NULL, ctx->graph); + SERD_FOREACH (s, statements) { + st = report_check(ctx, + s, + SERD_CHECK_DEPRECATED_PROPERTY, + false, + "Use of deprecated property"); + log_note(ctx, + t, + SERD_CHECK_DEPRECATED_PROPERTY, + "Property " QUOTE_FMT " is deprecated", + QUOTE_ARGS(thing_quote)); + } + serd_cursor_free(statements); + + } else if (is_a(ctx, thing, ctx->uris.rdfs_Class)) { + // For each explicit instance of this class in the target graph + SerdCursor* const statements = + serd_model_find(model, NULL, ctx->uris.rdf_type, thing, ctx->graph); + SERD_FOREACH (s, statements) { + st = report_check(ctx, + s, + SERD_CHECK_DEPRECATED_CLASS, + false, + "Instance of deprecated class"); + log_note(ctx, + t, + SERD_CHECK_DEPRECATED_CLASS, + "Class " QUOTE_FMT " is deprecated", + QUOTE_ARGS(thing_quote)); + } + serd_cursor_free(statements); + } + } + serd_cursor_free(things); + + return st; +} + +static SerdStatus +check_functional_property(SerdValidator* const ctx) +{ + const SerdModel* const model = ctx->model; + const URIs* uris = &ctx->uris; + SerdStatus st = SERD_SUCCESS; + + // For each owl:FunctionalProperty + SerdCursor* const properties = serd_model_find( + model, NULL, uris->rdf_type, uris->owl_FunctionalProperty, NULL); + SERD_FOREACH (p, properties) { + const SerdNode* const prop = serd_statement_subject(p); + const NodeQuote prop_quote = format_node(ctx, prop); + + const SerdNode* last_subj = NULL; + + // For each instance with this property in the target graph + SerdCursor* const statements = + serd_model_find(model, NULL, prop, NULL, ctx->graph); + SERD_FOREACH (s, statements) { + const SerdNode* const subj = serd_statement_subject(s); + if (serd_node_equals(subj, last_subj)) { + continue; + } + + // Count the number of values on this instance + SerdCursor* const o = + serd_model_find(ctx->model, subj, prop, NULL, ctx->graph); + const Count n = count_non_blanks(o, SERD_OBJECT); + + serd_cursor_free(o); + if (report_check(ctx, + s, + SERD_CHECK_FUNCTIONAL_PROPERTY, + n <= 1, + "Instance has %lu " QUOTE_FMT " properties", + n, + QUOTE_ARGS(prop_quote))) { + st = SERD_BAD_DATA; + log_note(ctx, + p, + SERD_CHECK_FUNCTIONAL_PROPERTY, + "An instance may have at most 1"); + } + + last_subj = subj; + } + serd_cursor_free(statements); + } + serd_cursor_free(properties); + + return st; +} + +// FIXME: name +static SerdStatus +check_instance(SerdValidator* const ctx) +{ + const SerdModel* const model = ctx->model; + const URIs* uris = &ctx->uris; + SerdStatus st = SERD_SUCCESS; + + // For each rdf:type property in the target graph + SerdCursor* const types = + serd_model_find(model, NULL, uris->rdf_type, NULL, ctx->graph); + SERD_FOREACH (t, types) { + const SerdNode* const instance = serd_statement_subject(t); + const SerdNode* const type = serd_statement_object(t); + const NodeQuote type_quote = format_node(ctx, type); + + if ((st = check_instance_type( + ctx, SERD_CHECK_INSTANCE_TYPE, type, t, instance, type))) { + log_note(ctx, + t, + SERD_CHECK_INSTANCE_TYPE, + "Instance is explicitly a " QUOTE_FMT, + QUOTE_ARGS(type_quote)); + break; + } + } + serd_cursor_free(types); + + return st; +} + +static SerdStatus +check_inverse_functional_property(SerdValidator* const ctx) +{ + const SerdModel* const model = ctx->model; + const URIs* uris = &ctx->uris; + SerdStatus st = SERD_SUCCESS; + + // For each owl:InverseFunctionalProperty + SerdCursor* const properties = serd_model_find( + model, NULL, uris->rdf_type, uris->owl_InverseFunctionalProperty, NULL); + SERD_FOREACH (p, properties) { + const SerdNode* const prop = serd_statement_subject(p); + const NodeQuote prop_quote = format_node(ctx, prop); + + const SerdNode* last_obj = NULL; + + // For each value of this property in the target graph + SerdCursor* const statements = + serd_model_find(model, NULL, prop, NULL, ctx->graph); + SERD_FOREACH (statement, statements) { + const SerdNode* const obj = serd_statement_object(statement); + const NodeQuote obj_quote = format_node(ctx, obj); + if (serd_node_equals(obj, last_obj)) { + continue; + } + + // Count the number of subjects with this value in the target graph + SerdCursor* s = serd_model_find(ctx->model, NULL, prop, obj, ctx->graph); + const Count n = count_non_blanks(s, SERD_SUBJECT); + + if (n > 1) { + // Get the range again so we can print a note for every value + serd_cursor_free(s); + s = serd_model_find(ctx->model, NULL, prop, obj, ctx->graph); + + SERD_FOREACH (value_statement, s) { + const SerdNode* const subj = serd_statement_subject(value_statement); + const NodeQuote subj_quote = format_node(ctx, subj); + + report_check(ctx, + value_statement, + SERD_CHECK_INVERSE_FUNCTIONAL_PROPERTY, + false, + "Instance " QUOTE_FMT " shares the " QUOTE_FMT + " " QUOTE_FMT, + QUOTE_ARGS(subj_quote), + QUOTE_ARGS(prop_quote), + QUOTE_ARGS(obj_quote)); + } + + log_note(ctx, + p, + SERD_CHECK_INVERSE_FUNCTIONAL_PROPERTY, + "At most 1 instance may have a given " QUOTE_FMT, + QUOTE_ARGS(prop_quote)); + } + + serd_cursor_free(s); + last_obj = obj; + } + serd_cursor_free(statements); + } + serd_cursor_free(properties); + + return st; +} + +static SerdStatus +check_object_property(SerdValidator* const ctx) +{ + const SerdModel* const model = ctx->model; + SerdStatus st = SERD_SUCCESS; + + // For each owl:ObjectProperty + SerdCursor* const properties = serd_model_find( + model, NULL, ctx->uris.rdf_type, ctx->uris.owl_ObjectProperty, NULL); + SERD_FOREACH_NODE (SERD_SUBJECT, prop, properties) { + const NodeQuote prop_quote = format_node(ctx, prop); + + // For each statement of this property in the target graph + SerdCursor* const statements = + serd_model_find(model, NULL, prop, NULL, ctx->graph); + SERD_FOREACH (s, statements) { + if (report_check(ctx, + s, + SERD_CHECK_OBJECT_PROPERTY, + serd_node_type(serd_statement_object(s)) != SERD_LITERAL, + "Object property has literal value")) { + st = SERD_BAD_DATA; + log_note(ctx, + serd_cursor_get(properties), + SERD_CHECK_OBJECT_PROPERTY, + "A " QUOTE_FMT " must be an instance", + QUOTE_ARGS(prop_quote)); + } + } + serd_cursor_free(statements); + } + serd_cursor_free(properties); + + return st; +} + +static SerdStatus +check_property_domain(SerdValidator* const ctx) +{ + const SerdModel* const model = ctx->model; + SerdStatus st = SERD_SUCCESS; + + // For each property with an rdfs:domain + SerdCursor* const properties = + serd_model_find(model, NULL, ctx->uris.rdfs_domain, NULL, NULL); + SERD_FOREACH (p, properties) { + const SerdNode* const prop = serd_statement_subject(p); + const NodeQuote prop_quote = format_node(ctx, prop); + const SerdNode* const domain = serd_statement_object(p); + const NodeQuote domain_quote = format_node(ctx, domain); + + // For each statement of this property in the target graph + SerdCursor* const statements = + serd_model_find(model, NULL, prop, NULL, ctx->graph); + SERD_FOREACH (statement, statements) { + const SerdNode* const subj = serd_statement_subject(statement); + + // Check that the subject is in the domain + if (check_instance_type( + ctx, SERD_CHECK_PROPERTY_DOMAIN, domain, statement, subj, domain)) { + log_note(ctx, + p, + SERD_CHECK_PROPERTY_DOMAIN, + "An instance with a " QUOTE_FMT " must be a " QUOTE_FMT, + QUOTE_ARGS(prop_quote), + QUOTE_ARGS(domain_quote)); + } + } + serd_cursor_free(statements); + } + serd_cursor_free(properties); + + return st; +} + +static SerdStatus +check_property_label(SerdValidator* const ctx) +{ + const SerdModel* const model = ctx->model; + SerdStatus st = SERD_SUCCESS; + + // For each ?property a rdf:Property in the target graph + SerdCursor* const properties = serd_model_find( + model, NULL, ctx->uris.rdf_type, ctx->uris.rdf_Property, ctx->graph); + SERD_FOREACH (p, properties) { + const SerdNode* const property = serd_statement_subject(p); + + update_status( + &st, + report_check(ctx, + p, + SERD_CHECK_PROPERTY_LABEL, + serd_model_ask( + ctx->model, property, ctx->uris.rdfs_label, 0, ctx->graph), + "Property <%s> has no label", + serd_node_string(property))); + } + serd_cursor_free(properties); + + return st; +} + +static SerdStatus +check_property_range(SerdValidator* const ctx) +{ + const SerdModel* const model = ctx->model; + SerdStatus st = SERD_SUCCESS; + + // For each property with an rdfs:range + SerdCursor* const properties = + serd_model_find(model, NULL, ctx->uris.rdfs_range, NULL, NULL); + SERD_FOREACH (p, properties) { + const SerdNode* const prop = serd_statement_subject(p); + const SerdNode* const klass = serd_statement_object(p); + const NodeQuote prop_quote = format_node(ctx, prop); + + // For each statement of this property in the target graph + SerdCursor* const statements = + serd_model_find(model, NULL, prop, NULL, ctx->graph); + SERD_FOREACH (statement, statements) { + const SerdNode* const obj = serd_statement_object(statement); + + // Check that the object is in the range + const SerdStatus range_st = + check_type(ctx, SERD_CHECK_PROPERTY_RANGE, statement, obj, klass); + if (range_st) { + log_note(ctx, + p, + SERD_CHECK_PROPERTY_RANGE, + "Required for any " QUOTE_FMT " value", + QUOTE_ARGS(prop_quote)); + + st = st ? st : range_st; + } + } + serd_cursor_free(statements); + } + serd_cursor_free(properties); + + return st; +} + +static SerdStatus +check_predicate_type(SerdValidator* const ctx) +{ + // For each predicate + SerdStatus st = SERD_SUCCESS; + const SerdNode* last_pred = NULL; + // FIXME: graph + SerdCursor* const all = serd_model_begin_ordered(ctx->model, SERD_ORDER_POS); + SERD_FOREACH (s, all) { + const SerdNode* const g = serd_statement_graph(s); + const SerdNode* const pred = serd_statement_predicate(s); + if (serd_node_equals(pred, last_pred) || + (ctx->graph && g && !serd_node_equals(ctx->graph, g))) { + continue; + } + + const bool defined = serd_model_ask(ctx->model, pred, NULL, NULL, NULL); + + st = merge_status(st, + report_check(ctx, + s, + SERD_CHECK_PREDICATE_TYPE, + defined, + "Undefined property <%s>", + serd_node_string(pred))); + + if (defined) { + st = merge_status( + st, + report_check( + ctx, + s, + SERD_CHECK_PREDICATE_TYPE, + serd_model_ask(ctx->model, pred, ctx->uris.rdf_type, NULL, NULL) && + is_a(ctx, pred, ctx->uris.rdf_Property), + "<%s> isn't a property", + serd_node_string(pred))); + } + + last_pred = pred; + } + serd_cursor_free(all); + + return st; +} + +static SerdStatus +check_acyclic(SerdValidator* const ctx, + const SerdValidatorCheck check, + const SerdNode* const root, + const SerdNode* const node, + const SerdNode* const property, + const char* const fmt) +{ + SerdStatus st = SERD_SUCCESS; + + // FIXME: graph + SerdCursor* const links = + serd_model_find(ctx->model, node, property, NULL, NULL); + SERD_FOREACH (l, links) { + const SerdNode* const object = serd_statement_object(l); + const NodeQuote object_quote = format_node(ctx, object); + + if ((st = report_check(ctx, + l, + check, + !serd_node_equals(object, root), + fmt, + QUOTE_ARGS(object_quote)))) { + break; + } + + if ((st = check_acyclic(ctx, check, root, object, property, fmt))) { + log_note(ctx, l, check, "Via " QUOTE_FMT, QUOTE_ARGS(object_quote)); + break; + } + } + serd_cursor_free(links); + + return st; +} + +static SerdStatus +check_subclass_cycle(SerdValidator* const ctx) +{ + const SerdModel* const model = ctx->model; + SerdStatus st = SERD_SUCCESS; + + // For each subclass + SerdCursor* const properties = + serd_model_find(model, NULL, ctx->uris.rdfs_subClassOf, NULL, NULL); + SERD_FOREACH_NODE (SERD_SUBJECT, root, properties) { + st = merge_status(st, + check_acyclic(ctx, + SERD_CHECK_CLASS_CYCLE, + root, + root, + ctx->uris.rdfs_subClassOf, + "Class " QUOTE_FMT + " is a sub-class of itself")); + } + serd_cursor_free(properties); + + return st; +} + +static SerdStatus +check_ondatatype_cycle(SerdValidator* const ctx) +{ + const SerdModel* const model = ctx->model; + SerdStatus st = SERD_SUCCESS; + + // For refined datatype + SerdCursor* const properties = + serd_model_find(model, NULL, ctx->uris.owl_onDatatype, NULL, NULL); + SERD_FOREACH_NODE (SERD_SUBJECT, root, properties) { + st = merge_status(st, + check_acyclic(ctx, + SERD_CHECK_DATATYPE_CYCLE, + root, + root, + ctx->uris.owl_onDatatype, + "Class " QUOTE_FMT + " is a sub-datatype of itself")); + } + serd_cursor_free(properties); + + return st; +} + +static SerdStatus +check_subproperty_cycle(SerdValidator* const ctx) +{ + const SerdModel* const model = ctx->model; + SerdStatus st = SERD_SUCCESS; + + // For each subproperty relation + SerdCursor* const properties = + serd_model_find(model, NULL, ctx->uris.rdfs_subPropertyOf, NULL, NULL); + SERD_FOREACH_NODE (SERD_SUBJECT, root, properties) { + st = merge_status(st, + check_acyclic(ctx, + SERD_CHECK_PROPERTY_CYCLE, + root, + root, + ctx->uris.rdfs_subPropertyOf, + "Property " QUOTE_FMT + " is a sub-property of itself")); + } + serd_cursor_free(properties); + + return st; +} + +/* Statement Checks */ + +static SerdStatus +statement_check_valid_literal(SerdValidator* const ctx, + const SerdStatement* const statement) +{ + const SerdNode* const object = serd_statement_object(statement); + if (serd_node_type(object) != SERD_LITERAL) { + return SERD_SUCCESS; + } + + if (!literal_is_valid(ctx, statement, object, serd_node_datatype(object))) { + /* log_note(ctx, l, check, "Via " QUOTE_FMT, QUOTE_ARGS(object_string)); */ + + return SERD_BAD_DATA; + } + + return SERD_SUCCESS; +} + +/* Entry Points */ + +SerdValidator* +serd_validator_new(SerdWorld* const world) +{ + assert(world); + + SerdValidator* const validator = + (SerdValidator*)serd_wcalloc(world, 1, sizeof(SerdValidator)); + + if (!validator) { + return NULL; + } + + SerdNodes* const nodes = serd_world_nodes(world); + + validator->world = world; + validator->true_node = serd_nodes_value(nodes, serd_bool(true)); + +#define URI(prefix, suffix) \ + validator->uris.prefix##_##suffix = \ + serd_nodes_uri(nodes, SERD_STRING(NS_##prefix #suffix)) + + URI(owl, Class); + URI(owl, DatatypeProperty); + URI(owl, DeprecatedClass); + URI(owl, DeprecatedProperty); + URI(owl, FunctionalProperty); + URI(owl, InverseFunctionalProperty); + URI(owl, ObjectProperty); + URI(owl, Restriction); + URI(owl, Thing); + URI(owl, allValuesFrom); + URI(owl, cardinality); + URI(owl, deprecated); + URI(owl, equivalentClass); + URI(owl, maxCardinality); + URI(owl, minCardinality); + URI(owl, onDatatype); + URI(owl, onProperty); + URI(owl, someValuesFrom); + URI(owl, unionOf); + URI(owl, withRestrictions); + URI(rdf, PlainLiteral); + URI(rdf, Property); + URI(rdf, XMLLiteral); + URI(rdf, first); + URI(rdf, rest); + URI(rdf, type); + URI(rdfs, Class); + URI(rdfs, Datatype); + URI(rdfs, Literal); + URI(rdfs, Resource); + URI(rdfs, domain); + URI(rdfs, label); + URI(rdfs, range); + URI(rdfs, subClassOf); + URI(rdfs, subPropertyOf); + URI(xsd, anyURI); + URI(xsd, maxExclusive); + URI(xsd, maxInclusive); + URI(xsd, minExclusive); + URI(xsd, minInclusive); + URI(xsd, pattern); + +#undef URI + + return validator; +} + +void +serd_validator_free(SerdValidator* const validator) +{ + serd_wfree(validator->world, validator); +} + +SerdStatus +serd_validator_enable_check(SerdValidator* const validator, + const SerdValidatorCheck check) +{ + assert(validator); + + validator->checks |= (SerdValidatorChecks)(1ull << check); + return SERD_SUCCESS; +} + +SerdStatus +serd_validator_disable_check(SerdValidator* const validator, + const SerdValidatorCheck check) +{ + assert(validator); + + validator->checks &= ~((SerdValidatorChecks)(1ull << check)); + return SERD_SUCCESS; +} + +static SerdStatus +for_each_matching_check(SerdValidator* const validator, + const char* const regex, + SerdStatus (*function)(SerdValidator*, + SerdValidatorCheck)) +{ + RerexPattern* const re = parse_regex(validator->world, NULL, regex); + if (!re) { + return SERD_BAD_ARG; + } + + bool matched = false; + RerexMatcher* matcher = rerex_new_matcher(re); + + for (uint64_t i = 0; i < N_CHECKS; ++i) { + if (rerex_match(matcher, check_names[i])) { + function(validator, (SerdValidatorCheck)i); + matched = true; + } + } + + rerex_free_matcher(matcher); + rerex_free_pattern(re); + + return matched ? SERD_SUCCESS : SERD_FAILURE; +} + +SerdStatus +serd_validator_enable_checks(SerdValidator* const validator, + const char* const regex) +{ + assert(validator); + assert(regex); + + if (!strcmp(regex, "all")) { + validator->checks = + ~((SerdValidatorChecks)(1ull << SERD_CHECK_EXPLICIT_INSTANCE_TYPE)); + return SERD_SUCCESS; + } + + if (!strcmp(regex, "everything")) { + validator->checks = ~0u; + return SERD_SUCCESS; + } + + return for_each_matching_check(validator, regex, serd_validator_enable_check); +} + +SerdStatus +serd_validator_disable_checks(SerdValidator* const validator, + const char* const regex) +{ + assert(validator); + assert(regex); + + if (!strcmp(regex, "all")) { + validator->checks = 0u; + return SERD_SUCCESS; + } + + return for_each_matching_check( + validator, regex, serd_validator_disable_check); +} + +static SerdStatus +check_literals(SerdValidator* const ctx) +{ + SerdStatus st = SERD_SUCCESS; + + if (ctx->checks & ((1ul << SERD_CHECK_DATATYPE_TYPE) | // + (1ul << SERD_CHECK_LITERAL_INSTANCE) | + (1ul << SERD_CHECK_LITERAL_MAX_EXCLUSIVE) | + (1ul << SERD_CHECK_LITERAL_MAX_INCLUSIVE) | + (1ul << SERD_CHECK_LITERAL_MIN_EXCLUSIVE) | + (1ul << SERD_CHECK_LITERAL_MIN_INCLUSIVE) | + (1ul << SERD_CHECK_LITERAL_PATTERN) | + (1ul << SERD_CHECK_LITERAL_RESTRICTION) | + (1ul << SERD_CHECK_LITERAL_VALUE))) { + SerdCursor* const all = + serd_model_begin_ordered(ctx->model, SERD_ORDER_SPO); + SERD_FOREACH (statement, all) { + update_status(&st, statement_check_valid_literal(ctx, statement)); + } + serd_cursor_free(all); + } + + return st; +} + +SerdStatus +serd_validate(SerdValidator* const validator, + const SerdModel* const model, + const SerdNode* const graph, + const SerdEnv* const env) +{ + assert(validator); + assert(model); + + SerdValidator* const ctx = validator; + SerdStatus st = SERD_SUCCESS; + + ctx->env = env; + ctx->model = model; + ctx->graph = graph; + + /* Check class/datatype cycles first, so we can give up early if any are + found. Dealing with non-trivial cycles everywhere makes some other checks + too complicated, and would likely just make the output more confusing + anyway. */ + + if (ctx->checks & (1ul << SERD_CHECK_CLASS_CYCLE)) { + update_status(&st, check_subclass_cycle(ctx)); + } + + if (ctx->checks & (1ul << SERD_CHECK_DATATYPE_CYCLE)) { + update_status(&st, check_ondatatype_cycle(ctx)); + } + + if (ctx->checks & (1ul << SERD_CHECK_PROPERTY_CYCLE)) { + update_status(&st, check_subproperty_cycle(ctx)); + } + + if (st) { + serd_logf( + ctx->world, + SERD_LOG_LEVEL_NOTICE, + "Recursive type or property definition found, aborting further checks"); + return st; + } + + /* No dangerous cycles that might hang the validator, so proceed with normal + checks. */ + + if (ctx->checks & (1ul << SERD_CHECK_PREDICATE_TYPE)) { + update_status(&st, check_predicate_type(ctx)); + } + + if (ctx->checks & (1ul << SERD_CHECK_CLASS_LABEL)) { + update_status(&st, check_class_label(ctx)); + } + + if (ctx->checks & (1ul << SERD_CHECK_DATATYPE_PROPERTY)) { + update_status(&st, check_datatype_property(ctx)); + } + + if (ctx->checks & ((1ul << SERD_CHECK_DEPRECATED_PROPERTY) | + (1ul << SERD_CHECK_DEPRECATED_CLASS))) { + update_status(&st, check_deprecated(ctx)); + } + + if (ctx->checks & (1ul << SERD_CHECK_FUNCTIONAL_PROPERTY)) { + update_status(&st, check_functional_property(ctx)); + } + + if (ctx->checks & (1ul << SERD_CHECK_INSTANCE_TYPE)) { + update_status(&st, check_instance(ctx)); + } + + if (ctx->checks & (1ul << SERD_CHECK_INVERSE_FUNCTIONAL_PROPERTY)) { + update_status(&st, check_inverse_functional_property(ctx)); + } + + if (ctx->checks & (1ul << SERD_CHECK_OBJECT_PROPERTY)) { + update_status(&st, check_object_property(ctx)); + } + + if (ctx->checks & (1ul << SERD_CHECK_PROPERTY_DOMAIN)) { + update_status(&st, check_property_domain(ctx)); + } + + if (ctx->checks & (1ul << SERD_CHECK_PROPERTY_LABEL)) { + update_status(&st, check_property_label(ctx)); + } + + if (ctx->checks & (1ul << SERD_CHECK_PROPERTY_RANGE)) { + update_status(&st, check_property_range(ctx)); + } + + update_status(&st, check_literals(ctx)); + + ctx->graph = NULL; + + if (ctx->n_errors > 0) { + serd_logf(ctx->world, + SERD_LOG_LEVEL_ERROR, + "Failed %u of %u validation checks", + ctx->n_errors, + ctx->n_checks); + + return SERD_BAD_DATA; + } + + serd_logf(ctx->world, + SERD_LOG_LEVEL_INFO, + "Passed all %u validation checks", + ctx->n_checks); + + return SERD_SUCCESS; +} |