diff options
author | David Robillard <d@drobilla.net> | 2021-03-08 23:25:35 -0500 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2021-03-09 01:43:52 -0500 |
commit | 7b954f5667e82de1b64984a9aeb26b8ebb5cab81 (patch) | |
tree | 5668f80ce2dc7a52cf66bbe2f4e4429b18f09e08 /src | |
parent | c579186c5dd4e11bffddd353cef8978a66ef9c10 (diff) | |
download | serd1-meson.tar.gz serd1-meson.tar.bz2 serd1-meson.zip |
WIP: Validationserd1-meson
Diffstat (limited to 'src')
-rw-r--r-- | src/iter.c | 12 | ||||
-rw-r--r-- | src/iter.h | 56 | ||||
-rw-r--r-- | src/macros.h | 1 | ||||
-rw-r--r-- | src/model.c | 111 | ||||
-rw-r--r-- | src/n3.c | 5 | ||||
-rw-r--r-- | src/namespaces.h | 4 | ||||
-rw-r--r-- | src/serdi.c | 64 | ||||
-rw-r--r-- | src/string.c | 4 | ||||
-rw-r--r-- | src/validate.c | 2139 | ||||
-rw-r--r-- | src/world.c | 10 |
10 files changed, 1760 insertions, 646 deletions
@@ -81,12 +81,12 @@ check_version(const SerdIter* const iter) } SerdIter* -serd_iter_new(const SerdModel* model, - ZixBTreeIter* cur, - const SerdQuad pat, - SerdOrder order, - SearchMode mode, - int n_prefix) +serd_iter_new(const SerdModel* model, + ZixBTreeIter* cur, + const SerdQuad pat, + SerdStatementOrder order, + SearchMode mode, + int n_prefix) { SerdIter* iter = (SerdIter*)calloc(1, sizeof(SerdIter)); iter->model = model; @@ -25,21 +25,21 @@ #include <stdbool.h> #include <stdint.h> -/** Triple ordering */ -typedef enum { - SPO, ///< Subject, Predicate, Object - SOP, ///< Subject, Object, Predicate - OPS, ///< Object, Predicate, Subject - OSP, ///< Object, Subject, Predicate - PSO, ///< Predicate, Subject, Object - POS, ///< Predicate, Object, Subject - GSPO, ///< Graph, Subject, Predicate, Object - GSOP, ///< Graph, Subject, Object, Predicate - GOPS, ///< Graph, Object, Predicate, Subject - GOSP, ///< Graph, Object, Subject, Predicate - GPSO, ///< Graph, Predicate, Subject, Object - GPOS ///< Graph, Predicate, Object, Subject -} SerdOrder; +/* /\** Triple ordering *\/ */ +/* typedef enum { */ +/* SPO, ///< Subject, Predicate, Object */ +/* SOP, ///< Subject, Object, Predicate */ +/* OPS, ///< Object, Predicate, Subject */ +/* OSP, ///< Object, Subject, Predicate */ +/* PSO, ///< Predicate, Subject, Object */ +/* POS, ///< Predicate, Object, Subject */ +/* GSPO, ///< Graph, Subject, Predicate, Object */ +/* GSOP, ///< Graph, Subject, Object, Predicate */ +/* GOPS, ///< Graph, Object, Predicate, Subject */ +/* GOSP, ///< Graph, Object, Subject, Predicate */ +/* GPSO, ///< Graph, Predicate, Subject, Object */ +/* GPOS ///< Graph, Predicate, Object, Subject */ +/* } SerdOrder; */ /** Mode for searching or iteration */ typedef enum { @@ -50,13 +50,13 @@ typedef enum { } SearchMode; struct SerdIterImpl { - const SerdModel* model; ///< Model being iterated over - uint64_t version; ///< Model version when iterator was created - ZixBTreeIter* cur; ///< Current DB cursor - SerdQuad pat; ///< Pattern (in ordering order) - SerdOrder order; ///< Store order (which index) - SearchMode mode; ///< Iteration mode - int n_prefix; ///< Prefix for RANGE and FILTER_RANGE + const SerdModel* model; ///< Model being iterated over + uint64_t version; ///< Model version when iterator was created + ZixBTreeIter* cur; ///< Current DB cursor + SerdQuad pat; ///< Pattern (in ordering order) + SerdStatementOrder order; ///< Store order (which index) + SearchMode mode; ///< Iteration mode + int n_prefix; ///< Prefix for RANGE and FILTER_RANGE }; #define NUM_ORDERS 12 @@ -82,12 +82,12 @@ static const int orderings[NUM_ORDERS][TUP_LEN] = { }; SerdIter* -serd_iter_new(const SerdModel* model, - ZixBTreeIter* cur, - const SerdQuad pat, - SerdOrder order, - SearchMode mode, - int n_prefix); +serd_iter_new(const SerdModel* model, + ZixBTreeIter* cur, + const SerdQuad pat, + SerdStatementOrder order, + SearchMode mode, + int n_prefix); bool serd_iter_scan_next(SerdIter* iter); diff --git a/src/macros.h b/src/macros.h index ac5af149..af232cc5 100644 --- a/src/macros.h +++ b/src/macros.h @@ -17,6 +17,7 @@ #ifndef SERD_MACROS_H #define SERD_MACROS_H +#define MAX(x, y) ((x) < (y) ? (y) : (x)) #define MIN(x, y) ((x) < (y) ? (x) : (y)) #endif // SERD_MACROS_H diff --git a/src/model.c b/src/model.c index f3e56971..f06a5e82 100644 --- a/src/model.c +++ b/src/model.c @@ -31,8 +31,10 @@ #include <stddef.h> #include <stdlib.h> -#define DEFAULT_ORDER SPO -#define DEFAULT_GRAPH_ORDER GSPO +#define DEFAULT_ORDER SERD_ORDER_SPO +#define DEFAULT_GRAPH_ORDER SERD_ORDER_GSPO + +static const SerdQuad wildcard_pattern = {0, 0, 0, 0}; /** Compare quads lexicographically, ignoring graph. @@ -81,13 +83,13 @@ serd_quad_compare(const void* x, const void* y, void* user_data) corresponding order with a G prepended (so G will be the MSN). */ static inline bool -serd_model_has_index(const SerdModel* model, - SerdOrder* order, - int* n_prefix, - bool graphs) +serd_model_has_index(const SerdModel* model, + SerdStatementOrder* order, + int* n_prefix, + bool graphs) { if (graphs) { - *order = (SerdOrder)(*order + GSPO); + *order = (SerdStatementOrder)(*order + SERD_ORDER_GSPO); *n_prefix += 1; } @@ -101,7 +103,7 @@ serd_model_has_index(const SerdModel* model, @param n_prefix Set to the length of the range prefix (for `mode` == RANGE and `mode` == FILTER_RANGE) */ -static SerdOrder +static SerdStatementOrder serd_model_best_index(const SerdModel* model, const SerdQuad pat, SearchMode* mode, @@ -113,7 +115,7 @@ serd_model_best_index(const SerdModel* model, ((pat[0] ? 1u : 0u) * 0x100 + (pat[1] ? 1u : 0u) * 0x010 + (pat[2] ? 1u : 0u) * 0x001); - SerdOrder good[2] = {(SerdOrder)-1, (SerdOrder)-1}; + SerdStatementOrder good[2] = {(SerdStatementOrder)-1, (SerdStatementOrder)-1}; #define PAT_CASE(sig, m, g0, g1, np) \ case sig: \ @@ -137,12 +139,12 @@ serd_model_best_index(const SerdModel* model, *n_prefix = graph_search ? 4 : 3; return graph_search ? DEFAULT_GRAPH_ORDER : DEFAULT_ORDER; - PAT_CASE(0x001, RANGE, OPS, OSP, 1); - PAT_CASE(0x010, RANGE, POS, PSO, 1); - PAT_CASE(0x011, RANGE, OPS, POS, 2); - PAT_CASE(0x100, RANGE, SPO, SOP, 1); - PAT_CASE(0x101, RANGE, SOP, OSP, 2); - PAT_CASE(0x110, RANGE, SPO, PSO, 2); + PAT_CASE(0x001, RANGE, SERD_ORDER_OPS, SERD_ORDER_OSP, 1); + PAT_CASE(0x010, RANGE, SERD_ORDER_POS, SERD_ORDER_PSO, 1); + PAT_CASE(0x011, RANGE, SERD_ORDER_OPS, SERD_ORDER_POS, 2); + PAT_CASE(0x100, RANGE, SERD_ORDER_SPO, SERD_ORDER_SOP, 1); + PAT_CASE(0x101, RANGE, SERD_ORDER_SOP, SERD_ORDER_OSP, 2); + PAT_CASE(0x110, RANGE, SERD_ORDER_SPO, SERD_ORDER_PSO, 2); default: break; } @@ -160,8 +162,8 @@ serd_model_best_index(const SerdModel* model, // Not so good orderings that require filtering, but can // still be constrained to a range switch (sig) { - PAT_CASE(0x011, FILTER_RANGE, OSP, PSO, 1); - PAT_CASE(0x101, FILTER_RANGE, SPO, OPS, 1); + PAT_CASE(0x011, FILTER_RANGE, SERD_ORDER_OSP, SERD_ORDER_PSO, 1); + PAT_CASE(0x101, FILTER_RANGE, SERD_ORDER_SPO, SERD_ORDER_OPS, 1); // SPO is always present, so 0x110 is never reached here default: break; @@ -212,9 +214,11 @@ serd_model_new(SerdWorld* world, SerdModelFlags flags) } // Create end iterator - const SerdOrder order = model->indices[GSPO] ? GSPO : SPO; - ZixBTreeIter* cur = zix_btree_end(model->indices[order]); - const SerdQuad pat = {0, 0, 0, 0}; + const SerdStatementOrder order = + model->indices[SERD_ORDER_GSPO] ? SERD_ORDER_GSPO : SERD_ORDER_SPO; + + ZixBTreeIter* cur = zix_btree_end(model->indices[order]); + const SerdQuad pat = {0, 0, 0, 0}; model->end = serd_iter_new(model, cur, pat, order, ALL, 0); @@ -230,7 +234,7 @@ serd_model_copy(const SerdModel* model) SerdModel* copy = serd_model_new(model->world, model->flags); - SerdRange* all = serd_model_all(model); + SerdRange* all = serd_model_all(model, SERD_ORDER_SPO); serd_model_add_range(copy, all); serd_range_free(all); @@ -250,8 +254,8 @@ serd_model_equals(const SerdModel* a, const SerdModel* b) return false; } - SerdRange* ra = serd_model_all(a); - SerdRange* rb = serd_model_all(b); + SerdRange* ra = serd_model_all(a, SERD_ORDER_SPO); + SerdRange* rb = serd_model_all(b, SERD_ORDER_SPO); bool result = true; while (!serd_range_empty(ra) && !serd_range_empty(rb)) { @@ -333,7 +337,8 @@ serd_model_flags(const SerdModel* model) size_t serd_model_size(const SerdModel* model) { - const SerdOrder order = model->indices[GSPO] ? GSPO : SPO; + const SerdStatementOrder order = + model->indices[SERD_ORDER_GSPO] ? SERD_ORDER_GSPO : SERD_ORDER_SPO; return zix_btree_size(model->indices[order]); } @@ -343,6 +348,34 @@ serd_model_empty(const SerdModel* model) return serd_model_size(model) == 0; } +// FIXME : expose + +static SerdIter* +serd_model_begin_ordered(const SerdModel* model, const SerdStatementOrder order) +{ + return model->indices[order] + ? serd_iter_new(model, + zix_btree_begin(model->indices[order]), + wildcard_pattern, + order, + ALL, + 0) + : NULL; +} + +static SerdIter* +serd_model_end_ordered(const SerdModel* model, const SerdStatementOrder order) +{ + return model->indices[order] + ? serd_iter_new(model, + zix_btree_end(model->indices[order]), + wildcard_pattern, + order, + ALL, + 0) + : NULL; +} + SerdIter* serd_model_begin(const SerdModel* model) { @@ -350,9 +383,10 @@ serd_model_begin(const SerdModel* model) return serd_iter_copy(serd_model_end(model)); } - const SerdOrder order = model->indices[GSPO] ? GSPO : SPO; - ZixBTreeIter* cur = zix_btree_begin(model->indices[order]); - const SerdQuad pat = {0, 0, 0, 0}; + const SerdStatementOrder order = + model->indices[SERD_ORDER_GSPO] ? SERD_ORDER_GSPO : SERD_ORDER_SPO; + ZixBTreeIter* cur = zix_btree_begin(model->indices[order]); + const SerdQuad pat = {0, 0, 0, 0}; return serd_iter_new(model, cur, pat, order, ALL, 0); } @@ -363,10 +397,17 @@ serd_model_end(const SerdModel* model) } SerdRange* -serd_model_all(const SerdModel* model) +serd_model_all(const SerdModel* model, const SerdStatementOrder order) { - return serd_range_new(serd_model_begin(model), - serd_iter_copy(serd_model_end(model))); + const SerdStatementOrder real_order = + (order >= SERD_ORDER_GSPO && !(model->flags & SERD_INDEX_GRAPHS)) + ? order - SERD_ORDER_GSPO + : order; + + return model->indices[real_order] + ? serd_range_new(serd_model_begin_ordered(model, real_order), + serd_model_end_ordered(model, real_order)) + : NULL; } SerdIter* @@ -381,9 +422,9 @@ serd_model_find(const SerdModel* model, return serd_model_begin(model); } - SearchMode mode = ALL; - int n_prefix = 0; - const SerdOrder index_order = + SearchMode mode = ALL; + int n_prefix = 0; + const SerdStatementOrder index_order = serd_model_best_index(model, pat, &mode, &n_prefix); ZixBTree* const db = model->indices[index_order]; @@ -560,7 +601,7 @@ serd_model_add_internal(SerdModel* model, if (model->indices[i]) { if (!zix_btree_insert(model->indices[i], statement)) { added = true; - } else if (i == GSPO) { + } else if (i == SERD_ORDER_GSPO) { break; // Statement already indexed } } @@ -631,7 +672,7 @@ serd_model_erase(SerdModel* model, SerdIter* iter) const SerdStatement* statement = serd_iter_get(iter); SerdStatement* removed = NULL; - for (int i = SPO; i <= GPOS; ++i) { + for (int i = SERD_ORDER_SPO; i <= SERD_ORDER_GPOS; ++i) { if (model->indices[i]) { zix_btree_remove(model->indices[i], statement, @@ -727,7 +727,10 @@ resolve_IRIREF(SerdReader* const reader, // Resolve relative URI reference to a full URI uri = serd_resolve_uri(uri, serd_env_base_uri_view(reader->env)); if (!uri.scheme.len) { - return SERD_ERR_BAD_URI; + return r_err(reader, + SERD_ERR_BAD_SYNTAX, + "failed to resolve relative URI reference <%s>\n", + serd_node_string(dest)); } // Push a new temporary node for constructing the resolved URI diff --git a/src/namespaces.h b/src/namespaces.h index e98eb5e1..5e2aa8c7 100644 --- a/src/namespaces.h +++ b/src/namespaces.h @@ -17,7 +17,9 @@ #ifndef SERD_NAMESPACES_H #define SERD_NAMESPACES_H -#define NS_XSD "http://www.w3.org/2001/XMLSchema#" +#define NS_OWL "http://www.w3.org/2002/07/owl#" #define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#" +#define NS_RDFS "http://www.w3.org/2000/01/rdf-schema#" +#define NS_XSD "http://www.w3.org/2001/XMLSchema#" #endif // SERD_NAMESPACES_H diff --git a/src/serdi.c b/src/serdi.c index b0f96f6c..6e3be8b0 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -64,7 +64,8 @@ print_usage(const char* name, bool error) fprintf(os, "Use - for INPUT to read from standard input.\n\n"); fprintf(os, " -C Convert literals to canonical form.\n"); fprintf(os, " -I BASE_URI Input base URI.\n"); - fprintf(os, " -V Validate inputs.\n"); + fprintf(os, " -V CHECKS Validate with checks matching CHECKS.\n"); + fprintf(os, " -X CHECKS Exclude validation checks matching CHECKS.\n"); fprintf(os, " -a Write ASCII output if possible.\n"); fprintf(os, " -b Fast bulk output for large serialisations.\n"); fprintf(os, " -c PREFIX Chop PREFIX from matching blank node IDs.\n"); @@ -75,15 +76,17 @@ print_usage(const char* name, bool error) fprintf(os, " -i SYNTAX Input syntax: turtle/ntriples/trig/nquads.\n"); fprintf(os, " -k BYTES Parser stack size.\n"); fprintf(os, " -l Lax (non-strict) parsing.\n"); - fprintf(os, " -m Build and serialise a model (no streaming).\n"); - fprintf(os, " -o SYNTAX Output syntax: turtle/ntriples/nquads.\n"); + fprintf(os, " -m Build a model in memory before writing.\n"); + fprintf(os, " -o SYNTAX Output syntax: empty/turtle/ntriples/nquads.\n"); fprintf(os, " -p PREFIX Add PREFIX to blank node IDs.\n"); fprintf(os, " -q Suppress all output except data.\n"); fprintf(os, " -r ROOT_URI Keep relative URIs within ROOT_URI.\n"); - fprintf(os, " -s INPUT Parse INPUT as string.\n"); + fprintf(os, " -s STRING Parse STRING as input.\n"); fprintf(os, " -t Write terser output without newlines.\n"); fprintf(os, " -v Display version information and exit.\n"); + fprintf(os, " -w FILENAME Write output to FILENAME instead of stdout.\n"); fprintf(os, " -x Support parsing variable nodes like `?x'.\n"); + fprintf(os, "\nSee serdi(1) for more detailed information: man serdi\n"); return error ? 1 : 0; } @@ -235,7 +238,18 @@ main(int argc, char** argv) base = serd_new_uri(SERD_MEASURE_STRING(argv[a])); } else if (argv[a][1] == 'V') { + if (++a == argc) { + return missing_arg(argv[0], 'V'); + } + + // Just enable validation and skip the pattern, checks are parsed below validate = use_model = true; + } else if (argv[a][1] == 'X') { + if (++a == argc) { + return missing_arg(argv[0], 'X'); + } + + // Checks are parsed below } else if (argv[a][1] == 'a') { writer_flags |= SERD_WRITE_ASCII; } else if (argv[a][1] == 'b') { @@ -390,7 +404,8 @@ main(int argc, char** argv) if (use_model) { const SerdModelFlags flags = SERD_INDEX_SPO | (input_has_graphs ? SERD_INDEX_GRAPHS : 0u) | - (no_inline ? 0u : SERD_INDEX_OPS) | (validate ? SERD_STORE_CURSORS : 0u); + (no_inline ? 0u : SERD_INDEX_OPS) | + (validate ? (SERD_STORE_CURSORS | SERD_INDEX_POS) : 0u); model = serd_model_new(world, flags); inserter = serd_inserter_new(model, env, NULL); @@ -453,8 +468,10 @@ main(int argc, char** argv) for (int i = 0; i < n_inputs; ++i) { if (!base) { - SerdNode* file_uri = - serd_new_file_uri(SERD_MEASURE_STRING(inputs[i]), SERD_EMPTY_STRING()); + SerdNode* file_uri = serd_new_real_file_uri(inputs[i], NULL); + /* SERD_MEASURE_STRING(inputs[i]), SERD_EMPTY_STRING()); */ + + /* fprintf(stderr, "SET BASE URI <%s>\n", serd_node_string(file_uri)); */ serd_env_set_base_uri(env, serd_node_string_view(file_uri)); serd_node_free(file_uri); @@ -473,18 +490,21 @@ main(int argc, char** argv) inputs[i], n_inputs > 1 ? prefix : add_prefix, bulk_read))) { + /* serd_world_logf(world, */ + /* "serd", */ + /* SERD_LOG_LEVEL_ERR, */ + /* 0, */ + /* NULL, */ + /* "error parsing %s\n", */ + /* inputs[i]); */ break; } } free(prefix); - if (!st && validate) { - st = serd_validate(model); - } - - if (st <= SERD_FAILURE && use_model) { + if (!st && output_syntax != SERD_SYNTAX_EMPTY && use_model) { const SerdSink* writer_sink = serd_writer_sink(writer); - SerdRange* range = serd_model_all(model); + SerdRange* range = serd_model_all(model, SERD_ORDER_GSPO); serd_env_write_prefixes(env, writer_sink); @@ -492,6 +512,22 @@ main(int argc, char** argv) serd_range_free(range); } + if (!st && validate) { + SerdValidator* const validator = serd_validator_new(world); + + for (int i = 1; i < argc && argv[i][0] == '-'; ++i) { + if (argv[i][1] == 'V') { + serd_validator_enable_checks(validator, argv[++i]); + } else if (argv[i][1] == 'X') { + serd_validator_disable_checks(validator, argv[++i]); + } + } + + st = serd_validate_model(validator, model, NULL); + + serd_validator_free(validator); + } + serd_sink_free(canon); serd_sink_free(filter); serd_sink_free(inserter); @@ -508,5 +544,5 @@ main(int argc, char** argv) serd_byte_sink_free(byte_sink); - return (st > SERD_FAILURE) ? 1 : 0; + return st <= SERD_FAILURE ? 0 : (int)st; } diff --git a/src/string.c b/src/string.c index d9a0140c..f91d6e52 100644 --- a/src/string.c +++ b/src/string.c @@ -41,7 +41,7 @@ serd_strerror(SerdStatus status) case SERD_FAILURE: return "Non-fatal failure"; case SERD_ERR_UNKNOWN: - return "Unknown error"; + break; case SERD_ERR_BAD_SYNTAX: return "Invalid syntax"; case SERD_ERR_BAD_ARG: @@ -70,8 +70,6 @@ serd_strerror(SerdStatus status) return "Invalid or unresolved URI"; case SERD_ERR_INVALID: return "Invalid data"; - default: - break; } return "Unknown error"; // never reached } diff --git a/src/validate.c b/src/validate.c index 46291b64..8c5ff073 100644 --- a/src/validate.c +++ b/src/validate.c @@ -14,9 +14,10 @@ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#include "serd_config.h" // IWYU pragma: keep - +#include "macros.h" #include "model.h" +#include "namespaces.h" +#include "node.h" #include "rerex/rerex.h" #include "serd/serd.h" #include "world.h" @@ -27,121 +28,235 @@ #include <stdlib.h> #include <string.h> +#include <assert.h> + +/* #define NS_checks "http://drobilla.net/sw/serd/checks#" */ #define NS_owl "http://www.w3.org/2002/07/owl#" #define NS_rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#" #define NS_rdfs "http://www.w3.org/2000/01/rdf-schema#" #define NS_xsd "http://www.w3.org/2001/XMLSchema#" -#define VERRORF(ctx, statement, fmt, ...) \ - report(ctx, statement, SERD_LOG_LEVEL_ERR, fmt, __VA_ARGS__) - -#define VERROR(ctx, statement, fmt) \ - report(ctx, statement, SERD_LOG_LEVEL_ERR, fmt) - -#define VWARNF(ctx, statement, fmt, ...) \ - report(ctx, statement, SERD_LOG_LEVEL_WARNING, fmt, __VA_ARGS__) - -#define VNOTEF(ctx, statement, fmt, ...) \ - report(ctx, statement, SERD_LOG_LEVEL_NOTICE, fmt, __VA_ARGS__) - -#define VNOTE(ctx, statement, fmt) \ - report(ctx, statement, SERD_LOG_LEVEL_NOTICE, fmt) - #define SERD_FOREACH(name, range) \ for (const SerdStatement*(name) = NULL; \ !serd_range_empty(range) && ((name) = serd_range_front(range)); \ serd_range_next(range)) +#define SERD_FOREACH_NODE(field, name, range) \ + for (const SerdNode*(name) = NULL; \ + !serd_range_empty(range) && \ + (name = serd_statement_node(serd_range_front(range), field)); \ + serd_range_next(range)) + +#define NODE_FMT "%s%s%s" + +#define NODE_ARGS(node) \ + open_quote(node), serd_node_string(node), close_quote(node) + +#define N_CHECKS 32 + +static const char* check_names[N_CHECKS] = { + "allValuesFrom", // + "anyUri", // + "cardinalityEqual", // + "cardinalityMax", // + "cardinalityMin", // + "classCycle", // + "classLabel", // + "classType", // + "datatypeProperty", // + "datatypeType", // + "deprecatedClass", // + "deprecatedProperty", // + "functionalProperty", // + "instanceLiteral", // + "instanceType", // + "inverseFunctionalProperty", // + "literalInstance", // + "literalMaxExclusive", // + "literalMaxInclusive", // + "literalMinExclusive", // + "literalMinInclusive", // + "literalPattern", // + "literalRestriction", // + "literalValue", // + "objectProperty", // + "plainLiteralDatatype", // + "predicateType", // + "propertyCycle", // + "propertyDomain", // + "propertyLabel", // + "propertyRange", // + "someValuesFrom", // +}; + typedef unsigned long Count; typedef struct { - SerdNode* owl_Class; - SerdNode* owl_DatatypeProperty; - SerdNode* owl_FunctionalProperty; - SerdNode* owl_InverseFunctionalProperty; - SerdNode* owl_ObjectProperty; - SerdNode* owl_Restriction; - SerdNode* owl_Thing; - SerdNode* owl_allValuesFrom; - SerdNode* owl_cardinality; - SerdNode* owl_equivalentClass; - SerdNode* owl_maxCardinality; - SerdNode* owl_minCardinality; - SerdNode* owl_onDatatype; - SerdNode* owl_onProperty; - SerdNode* owl_someValuesFrom; - SerdNode* owl_withRestrictions; - SerdNode* rdf_PlainLiteral; - SerdNode* rdf_Property; - SerdNode* rdf_first; - SerdNode* rdf_rest; - SerdNode* rdf_type; - SerdNode* rdfs_Class; - SerdNode* rdfs_Datatype; - SerdNode* rdfs_Literal; - SerdNode* rdfs_Resource; - SerdNode* rdfs_domain; - SerdNode* rdfs_label; - SerdNode* rdfs_range; - SerdNode* rdfs_subClassOf; - SerdNode* xsd_anyURI; - SerdNode* xsd_float; - SerdNode* xsd_decimal; - SerdNode* xsd_double; - SerdNode* xsd_maxExclusive; - SerdNode* xsd_maxInclusive; - SerdNode* xsd_minExclusive; - SerdNode* xsd_minInclusive; - SerdNode* xsd_pattern; - SerdNode* xsd_string; - SerdNode* sentinel; + const SerdNode* owl_Class; + const SerdNode* owl_DatatypeProperty; + const SerdNode* owl_deprecated; + const SerdNode* owl_DeprecatedClass; + const SerdNode* owl_DeprecatedProperty; + const SerdNode* owl_FunctionalProperty; + const SerdNode* owl_InverseFunctionalProperty; + const SerdNode* owl_ObjectProperty; + const SerdNode* owl_Restriction; + const SerdNode* owl_Thing; + const SerdNode* owl_allValuesFrom; + const SerdNode* owl_cardinality; + const SerdNode* owl_equivalentClass; + const SerdNode* owl_maxCardinality; + const SerdNode* owl_minCardinality; + const SerdNode* owl_onDatatype; + const SerdNode* owl_onProperty; + const SerdNode* owl_someValuesFrom; + const SerdNode* owl_withRestrictions; + const SerdNode* rdf_PlainLiteral; + const SerdNode* rdf_Property; + const SerdNode* rdf_first; + const SerdNode* rdf_rest; + const SerdNode* rdf_type; + const SerdNode* rdfs_Class; + const SerdNode* rdfs_Datatype; + const SerdNode* rdfs_Literal; + const SerdNode* rdfs_Resource; + const SerdNode* rdfs_domain; + const SerdNode* rdfs_label; + const SerdNode* rdfs_range; + const SerdNode* rdfs_subClassOf; + const SerdNode* rdfs_subPropertyOf; + const SerdNode* xsd_anyURI; + const SerdNode* xsd_maxExclusive; + const SerdNode* xsd_maxInclusive; + const SerdNode* xsd_minExclusive; + const SerdNode* xsd_minInclusive; + const SerdNode* xsd_pattern; + const SerdNode* sentinel; } URIs; -typedef struct { - URIs uris; +struct SerdValidatorImpl { + const SerdWorld* world; const SerdModel* model; + const SerdNode* graph; + const SerdNode* true_node; + URIs uris; + uint32_t checks; unsigned n_errors; - unsigned n_restrictions; - bool quiet; -} ValidationContext; + unsigned n_checks; + bool suppressed; +}; -static int -check_class_restriction(ValidationContext* ctx, +typedef struct { + const char* name; +} Check; + +static SerdStatus +check_class_restriction(SerdValidator* ctx, + const SerdNode* root_klass, const SerdNode* restriction, const SerdStatement* statement, const SerdNode* instance); -SERD_LOG_FUNC(4, 5) -static int -report(ValidationContext* ctx, - const SerdStatement* statement, - const SerdLogLevel level, - const char* fmt, - ...) +static const SerdNode* +string_node(const SerdValidator* const ctx, const SerdNode* const node) +{ + const SerdNode* const label = + serd_model_get(ctx->model, node, ctx->uris.rdfs_label, NULL, NULL); + + return label ? label : node; +} + +static const char* +open_quote(const SerdNode* const node) +{ + return (serd_node_type(node) == SERD_LITERAL) ? "\"" + : (serd_node_type(node) == SERD_URI) ? "<" + : (serd_node_type(node) == SERD_BLANK) ? "_:" + : ""; +} + +static const char* +close_quote(const SerdNode* const node) { - if (ctx->quiet) { - return 0; + return (serd_node_type(node) == SERD_LITERAL) ? "\"" + : (serd_node_type(node) == SERD_URI) ? ">" + : ""; +} + +SERD_LOG_FUNC(5, 0) +static void +vreportf(SerdValidator* const ctx, + const SerdValidatorCheck check, + const SerdLogLevel level, + const SerdStatement* const statement, + const char* const fmt, + va_list args) +{ + const char* file = NULL; + char line[24] = {0}; + char col[24] = {0}; + char status_string[12] = {0}; + + snprintf(status_string, sizeof(status_string), "%d", SERD_ERR_INVALID); + + const SerdCursor* const cursor = + statement ? serd_statement_cursor(statement) : NULL; + + if (cursor) { + file = serd_node_string(serd_cursor_name(cursor)); + + snprintf(line, sizeof(line), "%u", serd_cursor_line(cursor)); + snprintf(col, sizeof(col), "%u", serd_cursor_column(cursor)); } - va_list args; - va_start(args, fmt); - serd_world_vlogf_internal(ctx->model->world, - SERD_ERR_INVALID, - level, - serd_statement_cursor(statement), - fmt, - args); - va_end(args); + const SerdLogField fields[] = {{"SERD_STATUS", status_string}, + {"SERD_CHECK", check_names[check]}, + {"SERD_FILE", file}, + {"SERD_LINE", line}, + {"SERD_COL", col}}; - ++ctx->n_errors; - return 1; + serd_world_vlogf( + ctx->model->world, "serd", level, cursor ? 5 : 2, fields, fmt, args); } -static bool -check(ValidationContext* ctx, const bool value) +SERD_LOG_FUNC(5, 6) +static SerdStatus +report_check(SerdValidator* const ctx, + const SerdStatement* const statement, + const SerdValidatorCheck check, + const bool condition, + const char* fmt, + ...) { - ++ctx->n_restrictions; - return value; + if (!ctx->suppressed && (ctx->checks & (1u << check))) { + ctx->n_checks += 1; + ctx->n_errors += condition ? 0 : 1; + + if (!condition) { + va_list args; + va_start(args, fmt); + vreportf(ctx, check, SERD_LOG_LEVEL_ERR, statement, fmt, args); + va_end(args); + } + } + + return condition ? SERD_SUCCESS : SERD_ERR_INVALID; +} + +SERD_LOG_FUNC(4, 5) +static void +log_note(SerdValidator* const ctx, + const SerdStatement* const statement, + const SerdValidatorCheck check, + const char* fmt, + ...) +{ + if (!ctx->suppressed) { + va_list args; + va_start(args, fmt); + vreportf(ctx, check, SERD_LOG_LEVEL_NOTICE, statement, fmt, args); + va_end(args); + } } /* @@ -151,10 +266,10 @@ check(ValidationContext* ctx, const bool value) following `pred` arcs starting from child. */ static bool -is_descendant(ValidationContext* ctx, - const SerdNode* child, - const SerdNode* parent, - const SerdNode* pred) +is_descendant(SerdValidator* const ctx, + const SerdNode* const child, + const SerdNode* const parent, + const SerdNode* const pred) { if (serd_node_equals(child, parent) || serd_model_ask( @@ -162,206 +277,310 @@ is_descendant(ValidationContext* ctx, return true; } - SerdRange* i = serd_model_range(ctx->model, child, pred, NULL, NULL); - SERD_FOREACH (s, i) { - const SerdNode* o = serd_statement_object(s); + SerdRange* const i = serd_model_range(ctx->model, child, pred, NULL, NULL); + SERD_FOREACH_NODE (SERD_OBJECT, o, i) { if (!serd_node_equals(child, o) && is_descendant(ctx, o, parent, pred)) { serd_range_free(i); return true; } } - serd_range_free(i); + serd_range_free(i); return false; } -// Return true iff `child` is a subclass of `parent` -static bool -is_subclass(ValidationContext* ctx, - const SerdNode* child, - const SerdNode* parent) +// Return true iff `klass` is a subclass of `super` +static inline bool +is_subclass(SerdValidator* const ctx, + const SerdNode* const klass, + const SerdNode* const super) { - return is_descendant(ctx, child, parent, ctx->uris.rdfs_subClassOf); + return is_descendant(ctx, klass, super, ctx->uris.rdfs_subClassOf); } -// Return true iff `child` is a sub-datatype of `parent` -static bool -is_subdatatype(ValidationContext* ctx, - const SerdNode* child, - const SerdNode* parent) +static inline void +update_status(SerdStatus* const old, const SerdStatus next) { - return is_descendant(ctx, child, parent, ctx->uris.owl_onDatatype); + *old = next > *old ? next : *old; } -static bool -regex_match(ValidationContext* const ctx, - const SerdStatement* const pattern_statement, - const char* const regex, - const char* const str) +static inline SerdStatus +merge_status(const SerdStatus a, const SerdStatus b) { - RerexPattern* re = NULL; - size_t end = 0; - const RerexStatus st = rerex_compile(regex, &end, &re); - if (st) { - VERRORF(ctx, - pattern_statement, - "Error in pattern \"%s\" at offset %zu (%s)\n", - regex, - end, - rerex_strerror(st)); - return false; + return a > b ? a : b; +} + +static int +bound_cmp(SerdValidator* ctx, + const SerdStatement* literal_statement, + const SerdNode* literal, + const SerdNode* type, + const SerdStatement* bound_statement, + const SerdNode* bound) +{ + const ExessDatatype value_type = + exess_datatype_from_uri(serd_node_string(type)); + + if (value_type != EXESS_NOTHING) { + const ExessVariant bound_value = serd_node_get_value_as(bound, value_type); + + if (bound_value.datatype == EXESS_NOTHING) { + return !!serd_world_logf_internal( + ctx->model->world, + SERD_ERR_INVALID, + SERD_LOG_LEVEL_ERR, + serd_statement_cursor(bound_statement), + "Failed to parse bound literal \"%s\" (%s)", + serd_node_string(bound), + exess_strerror(bound_value.value.as_status)); + } + + const ExessVariant literal_value = + serd_node_get_value_as(literal, value_type); + + if (literal_value.datatype == EXESS_NOTHING) { + return !!serd_world_logf_internal( + ctx->model->world, + SERD_ERR_INVALID, + SERD_LOG_LEVEL_ERR, + serd_statement_cursor(literal_statement), + "Failed to parse literal \"%s\" (%s)", + serd_node_string(literal), + exess_strerror(literal_value.value.as_status)); + } + + return exess_compare(literal_value, bound_value); } - RerexMatcher* matcher = rerex_new_matcher(re); - const bool ret = rerex_match(matcher, str); + return strcmp(serd_node_string(literal), serd_node_string(bound)); +} - rerex_free_matcher(matcher); - rerex_free_pattern(re); +static inline bool +less(const int cmp) +{ + return cmp < 0; +} - return ret; +static inline bool +less_equal(const int cmp) +{ + return cmp <= 0; } -static int -bound_cmp(ValidationContext* ctx, - const SerdNode* literal, - const SerdNode* type, - const SerdNode* bound) +static inline bool +greater(const int cmp) { - const bool is_numeric = (is_subdatatype(ctx, type, ctx->uris.xsd_decimal) || - is_subdatatype(ctx, type, ctx->uris.xsd_double)); + return cmp > 0; +} - if (is_numeric) { - const double fbound = serd_get_double(bound); - const double fliteral = serd_get_double(literal); +static inline bool +greater_equal(const int cmp) +{ + return cmp >= 0; +} - return ((fliteral < fbound) ? -1 : (fliteral > fbound) ? 1 : 0); +static SerdStatus +check_bound(SerdValidator* const ctx, + const SerdValidatorCheck check, + const SerdStatement* const statement, + const SerdNode* const literal, + const SerdNode* const type, + const SerdNode* const restriction, + const SerdNode* const bound_property, + bool (*pred)(int), + const char* const message) +{ + SerdIter* const b = + serd_model_find(ctx->model, restriction, bound_property, 0, 0); + if (!b) { + return SERD_SUCCESS; } - return strcmp(serd_node_string(literal), serd_node_string(bound)); + const SerdNode* const bound = serd_statement_object(serd_iter_get(b)); + const int cmp = + bound_cmp(ctx, statement, literal, type, serd_iter_get(b), bound); + + serd_iter_free(b); + + return report_check(ctx, + statement, + check, + pred(cmp), + "Value \"%s\" %s \"%s\"", + serd_node_string(literal), + message, + serd_node_string(bound)); } -static bool -check_literal_restriction(ValidationContext* ctx, - const SerdStatement* statement, - const SerdNode* literal, - const SerdNode* type, - const SerdNode* restriction) +static RerexPattern* +parse_regex(const SerdWorld* const world, + const SerdStatement* const pattern_statement, + const char* const regex) { - const char* str = serd_node_string(literal); + const SerdCursor* const cursor = + pattern_statement ? serd_statement_cursor(pattern_statement) : NULL; - // Check xsd:pattern - const SerdStatement* pat_statement = serd_model_get_statement( - ctx->model, restriction, ctx->uris.xsd_pattern, 0, 0); - if (pat_statement) { - const SerdNode* pat_node = serd_statement_object(pat_statement); - const char* pat = serd_node_string(pat_node); - if (check(ctx, !regex_match(ctx, pat_statement, pat, str))) { - VERRORF(ctx, - statement, - "Value \"%s\" does not match pattern \"%s\"\n", - serd_node_string(literal), - pat); - return false; - } + RerexPattern* re = NULL; + size_t end = 0; + const RerexStatus st = rerex_compile(regex, &end, &re); + if (st) { + serd_world_logf_internal(world, + SERD_ERR_INVALID, + SERD_LOG_LEVEL_ERR, + cursor, + "Error in pattern \"%s\" at offset %zu (%s)", + regex, + end, + rerex_strerror(st)); + return NULL; } - // Check xsd:minInclusive - const SerdNode* lower = - serd_model_get(ctx->model, restriction, ctx->uris.xsd_minInclusive, 0, 0); - if (lower) { - if (check(ctx, bound_cmp(ctx, literal, type, lower) < 0)) { - VERRORF(ctx, - statement, - "Value \"%s\" < minimum \"%s\"\n", - serd_node_string(literal), - serd_node_string(lower)); - return false; - } - } + return re; +} - // Check xsd:maxInclusive - const SerdNode* upper = - serd_model_get(ctx->model, restriction, ctx->uris.xsd_maxInclusive, 0, 0); - if (upper) { - if (check(ctx, bound_cmp(ctx, literal, type, upper) > 0)) { - VERRORF(ctx, - statement, - "Value \"%s\" > than maximum \"%s\"\n", - serd_node_string(literal), - serd_node_string(upper)); - return false; - } +static bool +regex_match(SerdValidator* const ctx, + const SerdStatement* const pattern_statement, + const char* const regex, + const char* const str) +{ + RerexPattern* const re = parse_regex(ctx->world, pattern_statement, regex); + if (!re) { + return false; } - // Check xsd:minExclusive - const SerdNode* elower = - serd_model_get(ctx->model, restriction, ctx->uris.xsd_minExclusive, 0, 0); - if (elower) { - if (check(ctx, bound_cmp(ctx, literal, type, elower) <= 0)) { - VERRORF(ctx, - statement, - "Value \"%s\" <= exclusive minimum \"%s\"\n", - serd_node_string(literal), - serd_node_string(elower)); - return false; - } - } + RerexMatcher* const matcher = rerex_new_matcher(re); + const bool ret = rerex_match(matcher, str); - // Check xsd:maxExclusive - const SerdNode* eupper = - serd_model_get(ctx->model, restriction, ctx->uris.xsd_maxExclusive, 0, 0); - if (eupper) { - if (check(ctx, bound_cmp(ctx, literal, type, eupper) >= 0)) { - VERRORF(ctx, - statement, - "Value \"%s\" >= exclusive maximum \"%s\"\n", - serd_node_string(literal), - serd_node_string(eupper)); - return false; - } - ++ctx->n_restrictions; - } + rerex_free_matcher(matcher); + rerex_free_pattern(re); - return true; // Unknown restriction, be quietly tolerant + return ret; } -static bool -is_datatype(ValidationContext* ctx, const SerdNode* dtype) +static SerdStatus +check_literal_restriction(SerdValidator* const ctx, + const SerdStatement* const statement, + const SerdNode* const literal, + const SerdNode* const type, + const SerdNode* const restriction) { - SerdRange* t = - serd_model_range(ctx->model, dtype, ctx->uris.rdf_type, NULL, NULL); - SERD_FOREACH (s, t) { - const SerdNode* type = serd_statement_object(s); - if (is_subdatatype(ctx, type, ctx->uris.rdfs_Datatype)) { - serd_range_free(t); - return true; // Subdatatype of rdfs:Datatype - } + SerdStatus st = SERD_SUCCESS; + + // Check xsd:pattern + const SerdStatement* const pat_statement = serd_model_get_statement( + ctx->model, restriction, ctx->uris.xsd_pattern, 0, 0); + if (pat_statement) { + const char* const str = serd_node_string(literal); + const SerdNode* const pat_node = serd_statement_object(pat_statement); + const char* const pat = serd_node_string(pat_node); + + st = merge_status(st, + report_check(ctx, + statement, + SERD_CHECK_LITERAL_PATTERN, + regex_match(ctx, pat_statement, pat, str), + "Value \"%s\" doesn't match pattern \"%s\"", + serd_node_string(literal), + pat)); } - serd_range_free(t); - return false; + // Check inclusive/exclusive min and max + + typedef bool (*BoundCmpPredicate)(int); + + typedef struct { + SerdValidatorCheck check_id; + const SerdNode* restriction_property; + BoundCmpPredicate pred; + const char* const message; + } BoundCheck; + + const BoundCheck bound_checks[] = { + {SERD_CHECK_LITERAL_MIN_INCLUSIVE, + ctx->uris.xsd_minInclusive, + greater_equal, + "<"}, + {SERD_CHECK_LITERAL_MAX_INCLUSIVE, + ctx->uris.xsd_maxInclusive, + less_equal, + ">"}, + {SERD_CHECK_LITERAL_MIN_EXCLUSIVE, + ctx->uris.xsd_minExclusive, + greater, + "<="}, + {SERD_CHECK_LITERAL_MAX_EXCLUSIVE, ctx->uris.xsd_maxExclusive, less, ">="}, + }; + + for (size_t i = 0; i < sizeof(bound_checks) / sizeof(BoundCheck); ++i) { + st = merge_status(st, + check_bound(ctx, + bound_checks[i].check_id, + statement, + literal, + type, + restriction, + bound_checks[i].restriction_property, + bound_checks[i].pred, + bound_checks[i].message)); + } + + return st; } static bool -literal_is_valid(ValidationContext* ctx, - const SerdStatement* statement, - const SerdNode* literal, - const SerdNode* type) +literal_is_valid(SerdValidator* const ctx, + const SerdStatement* const statement, + const SerdNode* const literal, + const SerdNode* const type) { if (!type) { return true; } // Check that datatype is defined - const SerdNode* datatype = serd_node_datatype(literal); - if (datatype && !is_datatype(ctx, datatype)) { - VERRORF(ctx, - statement, - "Datatype <%s> is not defined\n", - serd_node_string(datatype)); + const SerdNode* const node_datatype = serd_node_datatype(literal); + if (node_datatype && report_check(ctx, + statement, + SERD_CHECK_DATATYPE_TYPE, + serd_model_ask(ctx->model, + node_datatype, + ctx->uris.rdf_type, + ctx->uris.rdfs_Datatype, + NULL), + "Undefined datatype <%s>", + serd_node_string(node_datatype))) { return false; } + const SerdNode* const type_string = string_node(ctx, type); + + const ExessDatatype value_type = + node_datatype ? exess_datatype_from_uri(serd_node_string(node_datatype)) + : EXESS_NOTHING; + + if (value_type != EXESS_NOTHING) { + /* Check if the literal parses correctly by measuring the canonical string. + This is better than trying to read a variant here, because it + automatically supports some unbounded datatypes like xsd:decimal and + xsd:base64Binary without needing to allocate space for the value. */ + + const ExessResult r = + exess_write_canonical(serd_node_string(literal), value_type, 0, NULL); + + if (report_check(ctx, + statement, + SERD_CHECK_LITERAL_VALUE, + r.status == EXESS_SUCCESS, + "Invalid xsd:%s literal \"%s\" (%s)", + serd_node_string(node_datatype) + sizeof(EXESS_XSD_URI) - + 1, + serd_node_string(literal), + exess_strerror(r.status))) { + return false; + } + } + // Find restrictions list const SerdNode* head = serd_model_get(ctx->model, type, ctx->uris.owl_withRestrictions, 0, 0); @@ -380,11 +599,15 @@ literal_is_valid(ValidationContext* ctx, break; } - const SerdNode* first = serd_statement_object(s_first); + const SerdNode* const first = serd_statement_object(s_first); // Check this restriction - if (!check_literal_restriction(ctx, statement, literal, type, first)) { - VNOTEF(ctx, s_first, "Restriction on <%s>\n", serd_node_string(type)); + if (check_literal_restriction(ctx, statement, literal, type, first)) { + log_note(ctx, + s_first, + SERD_CHECK_LITERAL_RESTRICTION, + "Restriction on datatype " NODE_FMT, + NODE_ARGS(type_string)); serd_iter_free(i_first); return false; } @@ -395,152 +618,205 @@ literal_is_valid(ValidationContext* ctx, } // Recurse up datatype hierarchy - const SerdNode* super = + const SerdNode* const super = serd_model_get(ctx->model, type, ctx->uris.owl_onDatatype, 0, 0); return super ? literal_is_valid(ctx, statement, literal, super) : true; } static bool -is_a(ValidationContext* ctx, const SerdNode* subject, const SerdNode* type) -{ - return serd_model_ask(ctx->model, subject, ctx->uris.rdf_type, type, 0); -} - -static bool -has_explicit_type(ValidationContext* ctx, - const SerdNode* node, - const SerdNode* klass) +is_a(SerdValidator* const ctx, + const SerdNode* const node, + const SerdNode* const type) { - if (is_a(ctx, node, klass)) { - return true; // Directly stated to be an instance + if (serd_model_ask(ctx->model, node, ctx->uris.rdf_type, type, 0)) { + return true; // Instance explicitly has this type } - SerdRange* t = + SerdRange* const node_types = serd_model_range(ctx->model, node, ctx->uris.rdf_type, NULL, NULL); - SERD_FOREACH (s, t) { - if (is_subclass(ctx, serd_statement_object(s), klass)) { - serd_range_free(t); - return true; // Explicit instance of a subclass + + SERD_FOREACH_NODE (SERD_OBJECT, node_type, node_types) { + if (is_subclass(ctx, node_type, type)) { + serd_range_free(node_types); + return true; // Instance explicitly has a subtype of this type } } - serd_range_free(t); + serd_range_free(node_types); return false; } -static bool -is_instance_of(ValidationContext* ctx, - const SerdNode* node, - const SerdNode* klass) +static SerdStatus +check_instance_type(SerdValidator* const ctx, + const SerdValidatorCheck check, + const SerdNode* const root_klass, + const SerdStatement* const statement, + const SerdNode* const instance, + const SerdNode* const klass) { - if (!serd_model_ask(ctx->model, node, NULL, NULL, NULL)) { - /* Nothing about this node known in the model at all, assume it is some - external resource we can't validate. */ - return true; + SerdStatus st = SERD_SUCCESS; + + // Any URI node is an xsd:anyURI + if (serd_node_equals(klass, ctx->uris.xsd_anyURI)) { + return report_check(ctx, + statement, + SERD_CHECK_ANY_URI, + serd_node_type(instance) == SERD_URI, + "Node " NODE_FMT " isn't a URI", + NODE_ARGS(instance)); } - if (serd_node_type(node) == SERD_BLANK) { - /* Be permissive for blank nodes and don't require explicit type - annotation, to avoid countless errors with things like lists. */ - return true; + // An instance can not be a rdfs:Literal or a rdfs:Datatype + if (report_check(ctx, + statement, + SERD_CHECK_INSTANCE_LITERAL, + !is_subclass(ctx, klass, ctx->uris.rdfs_Literal) && + !is_a(ctx, klass, ctx->uris.rdfs_Datatype), + "Instance " NODE_FMT " isn't a literal", + NODE_ARGS(instance))) { + return SERD_ERR_INVALID; } - return (has_explicit_type(ctx, node, klass) || - serd_node_equals(klass, ctx->uris.rdfs_Resource) || - serd_node_equals(klass, ctx->uris.owl_Thing)); -} - -static bool -check_instance_type(ValidationContext* ctx, - const SerdStatement* statement, - const SerdNode* node, - const SerdNode* klass) -{ - if (is_subclass(ctx, klass, ctx->uris.rdfs_Literal) || - is_a(ctx, klass, ctx->uris.rdfs_Datatype)) { - VERROR(ctx, statement, "Class instance found where literal expected\n"); - return false; + // Every instance is a rdfs:Resource and owl:Thing + if (serd_node_equals(klass, ctx->uris.rdfs_Resource) || + serd_node_equals(klass, ctx->uris.owl_Thing)) { + return SERD_SUCCESS; } - if (is_a(ctx, klass, ctx->uris.owl_Restriction)) { - if (check_class_restriction(ctx, klass, statement, node)) { - return false; + // If the class is a restriction, check it + if (serd_model_ask(ctx->model, + klass, + ctx->uris.rdf_type, + ctx->uris.owl_Restriction, + NULL)) { + if ((st = check_class_restriction( + ctx, root_klass, klass, statement, instance))) { + return st; } } - SerdRange* r = + SerdRange* const superclasses = serd_model_range(ctx->model, klass, ctx->uris.rdfs_subClassOf, NULL, NULL); - SERD_FOREACH (s, r) { - const SerdNode* super = serd_statement_object(s); - if (!serd_node_equals(super, klass) && - !check_instance_type(ctx, statement, node, super)) { - serd_range_free(r); - return false; + SERD_FOREACH_NODE (SERD_OBJECT, superclass, superclasses) { + const SerdNode* const klass_string = string_node(ctx, klass); + const SerdNode* const superclass_string = string_node(ctx, superclass); + if (!serd_node_equals(klass, superclass) && + check_instance_type( + ctx, check, klass, statement, instance, superclass)) { + if (serd_node_type(superclass) == SERD_URI || + serd_node_type(superclass) == SERD_CURIE) { + log_note(ctx, + serd_range_front(superclasses), + check, + "A " NODE_FMT " is a " NODE_FMT, + NODE_ARGS(klass_string), + NODE_ARGS(superclass_string)); + } + + serd_range_free(superclasses); + return SERD_ERR_INVALID; } } - serd_range_free(r); + serd_range_free(superclasses); - if (!is_instance_of(ctx, node, klass)) { - VERRORF(ctx, - statement, - "Node %s is not an instance of %s\n", - serd_node_string(node), - serd_node_string(klass)); - return false; + if (serd_model_ask(ctx->model, instance, ctx->uris.rdf_type, klass, NULL)) { + return SERD_SUCCESS; } - return true; + const SerdNode* const instance_string = string_node(ctx, instance); + const SerdNode* const klass_string = string_node(ctx, klass); + + if (!serd_model_ask(ctx->model, instance, NULL, NULL, NULL)) { + /* Nothing about this node known in the model at all, assume it is some + external resource we can't validate. */ + serd_world_logf_internal(ctx->model->world, + SERD_ERR_INVALID, + SERD_LOG_LEVEL_WARNING, + serd_statement_cursor(statement), + "Nothing known about " NODE_FMT + ", assuming it is a " NODE_FMT, + NODE_ARGS(instance_string), + NODE_ARGS(klass_string)); + + // FIXME: test + // return SERD_FAILURE; + /* return true; */ + return SERD_FAILURE; + } + + if (serd_node_type(instance) == SERD_BLANK) { + /* Be permissive for blank nodes and don't require explicit type + annotation, to avoid countless errors with things like lists. */ + // return SERD_FAILURE; + return SERD_SUCCESS; + } + + return report_check(ctx, + statement, + SERD_CHECK_INSTANCE_TYPE, + is_a(ctx, instance, klass), + "Instance " NODE_FMT " isn't a " NODE_FMT, + NODE_ARGS(instance_string), + NODE_ARGS(klass_string)); } -static bool -check_type(ValidationContext* ctx, - const SerdStatement* statement, - const SerdNode* node, - const SerdNode* type) +static SerdStatus +check_type(SerdValidator* ctx, + const SerdValidatorCheck check, + const SerdStatement* statement, + const SerdNode* node, + const SerdNode* type) { - if (serd_node_equals(type, ctx->uris.rdfs_Resource) || - serd_node_equals(type, ctx->uris.owl_Thing)) { - return true; // Trivially true for everything (more or less) + const SerdNode* const type_string = string_node(ctx, type); + + // Everything is an rdfs:Resource + if (serd_node_equals(type, ctx->uris.rdfs_Resource)) { + return SERD_SUCCESS; } - if (serd_node_type(node) == SERD_LITERAL) { + switch (serd_node_type(node)) { + case SERD_LITERAL: + // Every literal is an rdfs:Literal if (serd_node_equals(type, ctx->uris.rdfs_Literal)) { - return true; // Trivially true for a literal + return SERD_SUCCESS; } + // A plain literal can not have a datatype if (serd_node_equals(type, ctx->uris.rdf_PlainLiteral)) { - const SerdNode* const datatype = serd_node_datatype(node); - if (datatype) { - VERRORF(ctx, - statement, - "Literal \"%s\" should be plain, but has datatype " - "<%s>\n", - serd_node_string(node), - serd_node_string(datatype)); - return false; + if (report_check(ctx, + statement, + SERD_CHECK_PLAIN_LITERAL_DATATYPE, + !serd_node_datatype(node), + "Typed literal \"%s\" isn't a plain literal", + serd_node_string(node))) { + return SERD_ERR_INVALID; } - } else if (!is_a(ctx, type, ctx->uris.rdfs_Datatype)) { - VERRORF(ctx, - statement, - "Literal \"%s\" where instance of <%s> expected\n", - serd_node_string(node), - serd_node_string(type)); - return false; + } else if (report_check(ctx, + statement, + SERD_CHECK_LITERAL_INSTANCE, + is_a(ctx, type, ctx->uris.rdfs_Datatype), + "Literal \"%s\" isn't an instance of " NODE_FMT, + serd_node_string(node), + NODE_ARGS(type_string))) { + return SERD_ERR_INVALID; } - return literal_is_valid(ctx, statement, node, type); - } + return literal_is_valid(ctx, statement, node, type) ? SERD_SUCCESS + : SERD_ERR_INVALID; - if (serd_node_type(node) == SERD_URI) { - if (!is_subdatatype(ctx, type, ctx->uris.xsd_anyURI)) { - // Only check if type is not anyURI, since node is a URI - return check_instance_type(ctx, statement, node, type); + case SERD_URI: + if (serd_node_equals(type, ctx->uris.xsd_anyURI)) { + return SERD_SUCCESS; } - } else { - return check_instance_type(ctx, statement, node, type); + break; + + case SERD_CURIE: + case SERD_BLANK: + case SERD_VARIABLE: + break; } - return true; + return check_instance_type(ctx, check, type, statement, node, type); } static Count @@ -556,246 +832,860 @@ count_non_blanks(SerdRange* i, SerdField field) return n; } -static int -check_statement(ValidationContext* ctx, const SerdStatement* statement) -{ - int st = 0; - const URIs* uris = &ctx->uris; - const SerdNode* subj = serd_statement_subject(statement); - const SerdNode* pred = serd_statement_predicate(statement); - const SerdNode* obj = serd_statement_object(statement); - - if (serd_node_equals(pred, uris->rdf_type)) { - // Type statement, check that object is a valid instance of type - check_type(ctx, statement, subj, obj); - } - - if (!serd_model_ask(ctx->model, pred, uris->rdfs_label, 0, 0)) { - // Warn if property has no label - st = VWARNF( - ctx, statement, "Property <%s> has no label\n", serd_node_string(pred)); - } - - if (serd_node_type(obj) == SERD_LITERAL && - !literal_is_valid(ctx, statement, obj, serd_node_datatype(obj))) { - st = SERD_ERR_INVALID; - } - - // Check restrictions based on property type - if (is_a(ctx, pred, uris->owl_DatatypeProperty)) { - if (serd_node_type(obj) != SERD_LITERAL) { - st = VERROR(ctx, statement, "Datatype property with non-literal value\n"); - } - } else if (is_a(ctx, pred, uris->owl_ObjectProperty)) { - if (serd_node_type(obj) == SERD_LITERAL) { - st = VERROR(ctx, statement, "Object property with literal value\n"); - } - } else if (is_a(ctx, pred, uris->owl_FunctionalProperty)) { - SerdRange* o = serd_model_range(ctx->model, subj, pred, NULL, NULL); - const Count n = count_non_blanks(o, SERD_OBJECT); - if (n > 1) { - st = VERRORF(ctx, statement, "Functional property with %lu objects\n", n); - } - serd_range_free(o); - } else if (is_a(ctx, pred, uris->owl_InverseFunctionalProperty)) { - SerdRange* s = serd_model_range(ctx->model, NULL, pred, obj, NULL); - const Count n = count_non_blanks(s, SERD_SUBJECT); - if (n > 1) { - st = VERRORF( - ctx, statement, "Inverse functional property with %lu subjects\n", n); - } - serd_range_free(s); - } else { - SerdRange* t = serd_model_range(ctx->model, pred, uris->rdf_type, 0, 0); - - bool is_property = false; - SERD_FOREACH (s, t) { - const SerdNode* type = serd_statement_object(s); - if (is_subclass(ctx, type, uris->rdf_Property)) { - is_property = true; - break; +static SerdStatus +check_cardinality_restriction(SerdValidator* const ctx, + const SerdNode* const root_klass, + const SerdNode* const restriction, + const SerdStatement* const statement, + const SerdNode* const instance) +{ + const SerdNode* const prop = serd_model_get( + ctx->model, restriction, ctx->uris.owl_onProperty, NULL, NULL); + + const SerdStatement* const equal_statement = serd_model_get_statement( + ctx->model, restriction, ctx->uris.owl_cardinality, NULL, NULL); + + const SerdStatement* const min_statement = serd_model_get_statement( + ctx->model, restriction, ctx->uris.owl_minCardinality, NULL, NULL); + + const SerdStatement* const max_statement = serd_model_get_statement( + ctx->model, restriction, ctx->uris.owl_maxCardinality, NULL, NULL); + + if (!equal_statement && !min_statement && !max_statement) { + return SERD_SUCCESS; + } + + const SerdNode* const prop_string = string_node(ctx, prop); + const SerdNode* const klass_string = string_node(ctx, root_klass); + + SerdStatus st = SERD_SUCCESS; + const Count n_values = + (Count)serd_model_count(ctx->model, instance, prop, NULL, NULL); + + // Check owl:cardinality + if (equal_statement) { + const SerdNode* card = serd_statement_object(equal_statement); + const Count expected = strtoul(serd_node_string(card), NULL, 10); + if ((st = + report_check(ctx, + statement, + SERD_CHECK_CARDINALITY_EQUAL, + n_values == expected, + "Instance " NODE_FMT " has %lu " NODE_FMT " properties", + NODE_ARGS(instance), + n_values, + NODE_ARGS(prop_string)))) { + log_note(ctx, + equal_statement, + SERD_CHECK_CARDINALITY_EQUAL, + "A " NODE_FMT " must have exactly %lu", + NODE_ARGS(klass_string), + expected); + return st; + } + } + + // Check owl:minCardinality + if (min_statement) { + const SerdNode* card = serd_statement_object(min_statement); + const Count n_min = strtoul(serd_node_string(card), NULL, 10); + if ((st = + report_check(ctx, + statement, + SERD_CHECK_CARDINALITY_MIN, + n_values >= n_min, + "Instance " NODE_FMT " has %lu " NODE_FMT " properties", + NODE_ARGS(instance), + n_values, + NODE_ARGS(prop_string)))) { + log_note(ctx, + min_statement, + SERD_CHECK_CARDINALITY_MIN, + "A " NODE_FMT " must have at least %lu", + NODE_ARGS(klass_string), + n_min); + return st; + } + } + + // Check owl:maxCardinality + if (max_statement) { + const SerdNode* const card = serd_statement_object(max_statement); + const Count n_max = strtoul(serd_node_string(card), NULL, 10); + if ((st = + report_check(ctx, + statement, + SERD_CHECK_CARDINALITY_MAX, + n_values <= n_max, + "Instance " NODE_FMT " has %lu " NODE_FMT " properties", + NODE_ARGS(instance), + n_values, + NODE_ARGS(prop_string)))) { + log_note(ctx, + max_statement, + SERD_CHECK_CARDINALITY_MAX, + "A " NODE_FMT " must have at most %lu", + NODE_ARGS(klass_string), + n_max); + return st; + } + } + + return st; +} + +static SerdStatus +check_property_value_restriction(SerdValidator* const ctx, + const SerdNode* const root_klass, + const SerdNode* const restriction, + const SerdStatement* const statement, + const SerdNode* const instance) +{ + SerdStatus st = SERD_SUCCESS; + + const SerdNode* const prop = serd_model_get( + ctx->model, restriction, ctx->uris.owl_onProperty, NULL, NULL); + + const SerdStatement* const all_statement = serd_model_get_statement( + ctx->model, restriction, ctx->uris.owl_allValuesFrom, NULL, NULL); + + const SerdStatement* const some_statement = serd_model_get_statement( + ctx->model, restriction, ctx->uris.owl_someValuesFrom, NULL, NULL); + + if (!all_statement && !some_statement) { + return SERD_SUCCESS; + } + + const SerdNode* const prop_string = string_node(ctx, prop); + const SerdNode* const klass_string = string_node(ctx, root_klass); + + SerdRange* const values = + serd_model_range(ctx->model, instance, prop, NULL, NULL); + + if (all_statement) { + const SerdNode* const type = serd_statement_object(all_statement); + const SerdNode* const type_string = string_node(ctx, type); + SERD_FOREACH (v, values) { + const SerdNode* const value = serd_statement_object(v); + const SerdStatus all_st = report_check( + ctx, + v, + SERD_CHECK_ALL_VALUES_FROM, + !check_type(ctx, SERD_CHECK_ALL_VALUES_FROM, v, value, type), + "Value isn't a " NODE_FMT, + NODE_ARGS(type_string)); + + if (all_st) { + st = merge_status(st, all_st); + log_note(ctx, + all_statement, + SERD_CHECK_ALL_VALUES_FROM, + "Required for any " NODE_FMT " of a " NODE_FMT, + NODE_ARGS(prop_string), + NODE_ARGS(klass_string)); } } + } - if (!is_property) { - st = VERROR(ctx, statement, "Use of undefined property\n"); + if (some_statement) { + const SerdNode* const type = serd_statement_object(some_statement); + const SerdNode* const type_string = string_node(ctx, type); + + // Search for some value with the required type + bool found = false; + { + ctx->suppressed = true; + SERD_FOREACH_NODE (SERD_OBJECT, value, values) { + if (!check_type( + ctx, SERD_CHECK_SOME_VALUES_FROM, statement, value, type)) { + found = true; + break; + } + } + ctx->suppressed = false; } - serd_range_free(t); + const SerdStatus some_st = + report_check(ctx, + statement, + SERD_CHECK_SOME_VALUES_FROM, + found, + NODE_FMT " has no " NODE_FMT " that is a " NODE_FMT, + NODE_ARGS(instance), + NODE_ARGS(prop_string), + NODE_ARGS(type_string)); + + if (some_st) { + log_note(ctx, + some_statement, + SERD_CHECK_SOME_VALUES_FROM, + "An instance of " NODE_FMT " must have at least 1", + NODE_ARGS(klass_string)); + } + + st = merge_status(st, some_st); } - // Check range - SerdRange* r = serd_model_range(ctx->model, pred, uris->rdfs_range, 0, 0); - SERD_FOREACH (s, r) { - const SerdNode* range = serd_statement_object(s); - if (!has_explicit_type(ctx, obj, range) && - !check_type(ctx, statement, obj, range)) { - VNOTEF( - ctx, serd_range_front(r), "In range of <%s>\n", serd_node_string(pred)); + serd_range_free(values); + + return st; +} + +static SerdStatus +check_class_restriction(SerdValidator* const ctx, + const SerdNode* const root_klass, + const SerdNode* const restriction, + const SerdStatement* const statement, + const SerdNode* const instance) +{ + SerdStatus st = SERD_SUCCESS; + + st = merge_status(st, + check_cardinality_restriction( + ctx, root_klass, restriction, statement, instance)); + + st = merge_status(st, + check_property_value_restriction( + ctx, root_klass, restriction, statement, instance)); + + return st; +} + +/* Top-Level Checks */ + +static SerdStatus +check_class_label(SerdValidator* const ctx) +{ + const SerdModel* const model = ctx->model; + const URIs* const uris = &ctx->uris; + SerdStatus st = SERD_SUCCESS; + + // For each rdfs:Class + SerdRange* const klasses = + serd_model_range(model, NULL, uris->rdf_type, uris->rdfs_Class, ctx->graph); + SERD_FOREACH (k, klasses) { + const SerdNode* const klass = serd_statement_subject(k); + + // Check that it has an rdfs:label in the same graph + st = merge_status( + st, + report_check( + ctx, + k, + SERD_CHECK_CLASS_LABEL, + serd_model_ask(ctx->model, klass, uris->rdfs_label, 0, ctx->graph), + "Class <%s> has no label", + serd_node_string(klass))); + } + serd_range_free(klasses); + + return st; +} + +static SerdStatus +check_class_type(SerdValidator* const ctx) +{ + // For each explicit instance of a class + SerdStatus st = SERD_SUCCESS; + const SerdNode* last_klass = NULL; + SerdRange* const instances = + serd_model_range(ctx->model, NULL, ctx->uris.rdf_type, NULL, ctx->graph); + // FIXME: prefer OPS order and skip dupes + SERD_FOREACH (i, instances) { + const SerdNode* const klass = serd_statement_object(i); + if (serd_node_equals(klass, last_klass)) { + continue; + } + + const bool defined = serd_model_ask(ctx->model, klass, NULL, NULL, NULL); + + st = merge_status(st, + report_check(ctx, + i, + SERD_CHECK_CLASS_TYPE, + defined, + "Undefined class <%s>", + serd_node_string(klass))); + + if (defined) { + st = merge_status( + st, + report_check( + ctx, + i, + SERD_CHECK_CLASS_TYPE, + serd_model_ask(ctx->model, klass, ctx->uris.rdf_type, NULL, NULL) && + is_a(ctx, klass, ctx->uris.rdfs_Class), + "<%s> isn't a class", + serd_node_string(klass))); } + + last_klass = klass; } - serd_range_free(r); + serd_range_free(instances); - // Check domain - SerdRange* d = serd_model_range(ctx->model, pred, uris->rdfs_domain, 0, 0); - SERD_FOREACH (s, d) { - const SerdNode* domain = serd_statement_object(s); - if (!has_explicit_type(ctx, subj, domain) && - !check_type(ctx, statement, subj, domain)) { - VNOTEF(ctx, - serd_range_front(d), - "In domain of <%s>\n", - serd_node_string(pred)); + return st; +} + +static SerdStatus +check_datatype_property(SerdValidator* const ctx) +{ + const SerdModel* const model = ctx->model; + const URIs* uris = &ctx->uris; + SerdStatus st = SERD_SUCCESS; + + // For each owl:DatatypeProperty + SerdRange* const properties = serd_model_range( + model, NULL, uris->rdf_type, uris->owl_DatatypeProperty, NULL); + SERD_FOREACH (p, properties) { + const SerdNode* const prop = serd_statement_subject(p); + const SerdNode* const prop_string = string_node(ctx, prop); + + // For each statement of this property in the target graph + SerdRange* const statements = + serd_model_range(model, NULL, prop, NULL, ctx->graph); + SERD_FOREACH (s, statements) { + const SerdNode* const object = serd_statement_object(s); + + // Check that the object is a literal + if ((st = report_check(ctx, + s, + SERD_CHECK_DATATYPE_PROPERTY, + serd_node_type(object) == SERD_LITERAL, + NODE_FMT " isn't a literal", + NODE_ARGS(serd_statement_object(s))))) { + log_note(ctx, + p, + SERD_CHECK_DATATYPE_PROPERTY, + "A " NODE_FMT " must be a literal", + NODE_ARGS(prop_string)); + } } + serd_range_free(statements); } - serd_range_free(d); + serd_range_free(properties); return st; } -static int -cardinality_error(ValidationContext* ctx, - const SerdStatement* statement, - const SerdStatement* restriction_statement, - const SerdNode* property, - const Count actual_values, - const char* comparison, - const Count expected_values) -{ - const int st = VERRORF(ctx, - statement, - "Property <%s> has %lu %s %lu values\n", - serd_node_string(property), - actual_values, - comparison, - expected_values); - VNOTE(ctx, restriction_statement, "Restriction here\n"); +static SerdStatus +check_deprecated(SerdValidator* const ctx) +{ + const SerdModel* const model = ctx->model; + SerdStatus st = SERD_SUCCESS; + + // For each deprecated thing + SerdRange* const things = serd_model_range( + model, NULL, ctx->uris.owl_deprecated, ctx->true_node, NULL); + SERD_FOREACH (t, things) { + const SerdNode* const thing = serd_statement_subject(t); + const SerdNode* const thing_string = string_node(ctx, thing); + + if (is_a(ctx, thing, ctx->uris.rdf_Property)) { + // For each statement of this property in the target graph + SerdRange* const statements = + serd_model_range(model, NULL, thing, NULL, ctx->graph); + SERD_FOREACH (s, statements) { + st = report_check(ctx, + s, + SERD_CHECK_DEPRECATED_PROPERTY, + false, + "Use of deprecated property"); + log_note(ctx, + t, + SERD_CHECK_DEPRECATED_PROPERTY, + "Property " NODE_FMT " is deprecated", + NODE_ARGS(thing_string)); + } + serd_range_free(statements); + + } else if (is_a(ctx, thing, ctx->uris.rdfs_Class)) { + // For each explicit instance of this class in the target graph + SerdRange* const statements = + serd_model_range(model, NULL, ctx->uris.rdf_type, thing, ctx->graph); + SERD_FOREACH (s, statements) { + st = report_check(ctx, + s, + SERD_CHECK_DEPRECATED_CLASS, + false, + "Instance of deprecated class"); + log_note(ctx, + t, + SERD_CHECK_DEPRECATED_CLASS, + "Class " NODE_FMT " is deprecated", + NODE_ARGS(thing_string)); + } + serd_range_free(statements); + } + } + serd_range_free(things); + return st; } -static int -check_property_restriction(ValidationContext* ctx, - const SerdNode* restriction, - const SerdNode* prop, - const SerdStatement* statement, - const SerdNode* instance) -{ - int st = 0; - const Count values = - (Count)serd_model_count(ctx->model, instance, prop, NULL, NULL); +static SerdStatus +check_functional_property(SerdValidator* const ctx) +{ + const SerdModel* const model = ctx->model; + const URIs* uris = &ctx->uris; + SerdStatus st = SERD_SUCCESS; + + // For each owl:FunctionalProperty + SerdRange* const properties = serd_model_range( + model, NULL, uris->rdf_type, uris->owl_FunctionalProperty, NULL); + SERD_FOREACH (p, properties) { + const SerdNode* const prop = serd_statement_subject(p); + const SerdNode* const prop_string = string_node(ctx, prop); + + const SerdNode* last_subj = NULL; + + // For each instance with this property in the target graph + SerdRange* const statements = + serd_model_range(model, NULL, prop, NULL, ctx->graph); + SERD_FOREACH (s, statements) { + const SerdNode* const subj = serd_statement_subject(s); + if (serd_node_equals(subj, last_subj)) { + continue; + } - // Check exact cardinality - const SerdStatement* c = serd_model_get_statement( - ctx->model, restriction, ctx->uris.owl_cardinality, NULL, NULL); - if (c) { - const SerdNode* card = serd_statement_object(c); - const Count count = strtoul(serd_node_string(card), NULL, 10); - if (check(ctx, values != count)) { - st = cardinality_error(ctx, statement, c, prop, values, "!=", count); + // Count the number of values on this instance + SerdRange* const o = + serd_model_range(ctx->model, subj, prop, NULL, ctx->graph); + const Count n = count_non_blanks(o, SERD_OBJECT); + + serd_range_free(o); + if (report_check(ctx, + s, + SERD_CHECK_FUNCTIONAL_PROPERTY, + n <= 1, + "Instance has %lu " NODE_FMT " properties", + n, + NODE_ARGS(prop_string))) { + st = SERD_ERR_INVALID; + log_note(ctx, + p, + SERD_CHECK_FUNCTIONAL_PROPERTY, + "An instance may have at most 1"); + } + + last_subj = subj; } + serd_range_free(statements); } + serd_range_free(properties); - // Check minimum cardinality - const SerdStatement* l = serd_model_get_statement( - ctx->model, restriction, ctx->uris.owl_minCardinality, NULL, NULL); - if (l) { - const SerdNode* card = serd_statement_object(l); - const Count count = strtoul(serd_node_string(card), NULL, 10); - if (check(ctx, values < count)) { - st = cardinality_error(ctx, statement, l, prop, values, "<", count); + return st; +} + +// FIXME: name +static SerdStatus +check_instance(SerdValidator* const ctx) +{ + const SerdModel* const model = ctx->model; + const URIs* uris = &ctx->uris; + SerdStatus st = SERD_SUCCESS; + + // For each rdf:type property in the target graph + SerdRange* const types = + serd_model_range(model, NULL, uris->rdf_type, NULL, ctx->graph); + SERD_FOREACH (t, types) { + const SerdNode* const instance = serd_statement_subject(t); + const SerdNode* const type = serd_statement_object(t); + const SerdNode* const type_string = string_node(ctx, type); + + if ((st = check_instance_type( + ctx, SERD_CHECK_INSTANCE_TYPE, type, t, instance, type))) { + log_note(ctx, + t, + SERD_CHECK_INSTANCE_TYPE, + "Instance is a " NODE_FMT, + NODE_ARGS(type_string)); + break; } } + serd_range_free(types); - // Check maximum cardinality - const SerdStatement* u = serd_model_get_statement( - ctx->model, restriction, ctx->uris.owl_maxCardinality, NULL, NULL); - if (u) { - const SerdNode* card = serd_statement_object(u); - const Count count = strtoul(serd_node_string(card), NULL, 10); - if (check(ctx, values > count)) { - st = cardinality_error(ctx, statement, u, prop, values, ">", count); - } - } - - // Check someValuesFrom - const SerdStatement* s = serd_model_get_statement( - ctx->model, restriction, ctx->uris.owl_someValuesFrom, 0, 0); - if (s) { - const SerdNode* some = serd_statement_object(s); - - ctx->quiet = true; - SerdRange* v = serd_model_range(ctx->model, instance, prop, NULL, NULL); - bool found = false; - SERD_FOREACH (i, v) { - const SerdNode* value = serd_statement_object(i); - if (check_type(ctx, statement, value, some)) { - found = true; - break; + return st; +} + +static SerdStatus +check_inverse_functional_property(SerdValidator* const ctx) +{ + const SerdModel* const model = ctx->model; + const URIs* uris = &ctx->uris; + SerdStatus st = SERD_SUCCESS; + + // For each owl:InverseFunctionalProperty + SerdRange* const properties = serd_model_range( + model, NULL, uris->rdf_type, uris->owl_InverseFunctionalProperty, NULL); + SERD_FOREACH (p, properties) { + const SerdNode* const prop = serd_statement_subject(p); + const SerdNode* const prop_string = string_node(ctx, prop); + + const SerdNode* last_obj = NULL; + + // For each value of this property in the target graph + SerdRange* const statements = + serd_model_range(model, NULL, prop, NULL, ctx->graph); + SERD_FOREACH (statement, statements) { + const SerdNode* const obj = serd_statement_object(statement); + const SerdNode* const obj_string = string_node(ctx, obj); + if (serd_node_equals(obj, last_obj)) { + continue; } + + // Count the number of subjects with this value in the target graph + SerdRange* s = serd_model_range(ctx->model, NULL, prop, obj, ctx->graph); + const Count n = count_non_blanks(s, SERD_SUBJECT); + + if (n > 1) { + // Get the range again so we can print a note for every value + serd_range_free(s); + s = serd_model_range(ctx->model, NULL, prop, obj, ctx->graph); + + SERD_FOREACH (value_statement, s) { + const SerdNode* const subj = serd_statement_subject(value_statement); + const SerdNode* const subj_string = string_node(ctx, subj); + + report_check(ctx, + value_statement, + SERD_CHECK_INVERSE_FUNCTIONAL_PROPERTY, + false, + "Instance " NODE_FMT " shares the " NODE_FMT + " " NODE_FMT, + NODE_ARGS(subj_string), + NODE_ARGS(prop_string), + NODE_ARGS(obj_string)); + } + + log_note(ctx, + p, + SERD_CHECK_INVERSE_FUNCTIONAL_PROPERTY, + "At most 1 instance may have a given " NODE_FMT, + NODE_ARGS(prop_string)); + } + + serd_range_free(s); + last_obj = obj; } - ctx->quiet = false; + serd_range_free(statements); + } + serd_range_free(properties); - if (check(ctx, !found)) { - st = VERRORF(ctx, - statement, - "%s has no <%s> values of type <%s>\n", - serd_node_string(instance), - serd_node_string(prop), - serd_node_string(some)); - VNOTE(ctx, s, "Restriction here\n"); - } - serd_range_free(v); - } - - // Check allValuesFrom - const SerdStatement* a = serd_model_get_statement( - ctx->model, restriction, ctx->uris.owl_allValuesFrom, 0, 0); - if (a) { - ++ctx->n_restrictions; - const SerdNode* all = serd_statement_object(a); - - SerdRange* v = serd_model_range(ctx->model, instance, prop, NULL, NULL); - SERD_FOREACH (i, v) { - const SerdNode* value = serd_statement_object(i); - if (!check_type(ctx, statement, value, all)) { - st = VERRORF(ctx, - i, - "<%s> value not of type <%s>\n", - serd_node_string(prop), - serd_node_string(all)); - VNOTE(ctx, a, "Restriction here\n"); - break; + return st; +} + +static SerdStatus +check_object_property(SerdValidator* const ctx) +{ + const SerdModel* const model = ctx->model; + SerdStatus st = SERD_SUCCESS; + + // For each owl:ObjectProperty + SerdRange* const properties = serd_model_range( + model, NULL, ctx->uris.rdf_type, ctx->uris.owl_ObjectProperty, NULL); + SERD_FOREACH_NODE (SERD_SUBJECT, prop, properties) { + const SerdNode* const prop_string = string_node(ctx, prop); + + // For each statement of this property in the target graph + SerdRange* const statements = + serd_model_range(model, NULL, prop, NULL, ctx->graph); + SERD_FOREACH (s, statements) { + if (report_check(ctx, + s, + SERD_CHECK_OBJECT_PROPERTY, + serd_node_type(serd_statement_object(s)) != SERD_LITERAL, + "Object property has literal value")) { + st = SERD_ERR_INVALID; + log_note(ctx, + serd_range_front(properties), + SERD_CHECK_OBJECT_PROPERTY, + "A " NODE_FMT " must be an instance", + NODE_ARGS(prop_string)); } } - serd_range_free(v); + serd_range_free(statements); } + serd_range_free(properties); return st; } -static int -check_class_restriction(ValidationContext* ctx, - const SerdNode* restriction, - const SerdStatement* statement, - const SerdNode* instance) +static SerdStatus +check_property_domain(SerdValidator* const ctx) { - const SerdNode* prop = serd_model_get( - ctx->model, restriction, ctx->uris.owl_onProperty, NULL, NULL); + const SerdModel* const model = ctx->model; + SerdStatus st = SERD_SUCCESS; + + // For each property with an rdfs:domain + SerdRange* const properties = + serd_model_range(model, NULL, ctx->uris.rdfs_domain, NULL, NULL); + SERD_FOREACH (p, properties) { + const SerdNode* const prop = serd_statement_subject(p); + const SerdNode* const prop_string = string_node(ctx, prop); + const SerdNode* const domain = serd_statement_object(p); + const SerdNode* const domain_string = string_node(ctx, domain); + + // For each statement of this property in the target graph + SerdRange* const statements = + serd_model_range(model, NULL, prop, NULL, ctx->graph); + SERD_FOREACH (statement, statements) { + const SerdNode* const subj = serd_statement_subject(statement); + + // Check that the subject is in the domain + if (check_instance_type( + ctx, SERD_CHECK_PROPERTY_DOMAIN, domain, statement, subj, domain)) { + log_note(ctx, + p, + SERD_CHECK_PROPERTY_DOMAIN, + "An instance with a " NODE_FMT " must be a " NODE_FMT, + NODE_ARGS(prop_string), + NODE_ARGS(domain_string)); + } + } + serd_range_free(statements); + } + serd_range_free(properties); - return prop ? check_property_restriction( - ctx, restriction, prop, statement, instance) - : 0; + return st; } -static void -init_uris(URIs* uris) +static SerdStatus +check_property_label(SerdValidator* const ctx) { -#define URI(prefix, suffix) \ - uris->prefix##_##suffix = \ - serd_new_uri(SERD_STATIC_STRING(NS_##prefix #suffix)) + const SerdModel* const model = ctx->model; + SerdStatus st = SERD_SUCCESS; + + // For each ?property a rdf:Property in the target graph + SerdRange* const properties = serd_model_range( + model, NULL, ctx->uris.rdf_type, ctx->uris.rdf_Property, ctx->graph); + SERD_FOREACH (p, properties) { + const SerdNode* const property = serd_statement_subject(p); + + update_status( + &st, + report_check(ctx, + p, + SERD_CHECK_PROPERTY_LABEL, + serd_model_ask( + ctx->model, property, ctx->uris.rdfs_label, 0, ctx->graph), + "Property <%s> has no label", + serd_node_string(property))); + } + serd_range_free(properties); + + return st; +} + +static SerdStatus +check_property_range(SerdValidator* const ctx) +{ + const SerdModel* const model = ctx->model; + SerdStatus st = SERD_SUCCESS; + + // For each property with an rdfs:range + SerdRange* const properties = + serd_model_range(model, NULL, ctx->uris.rdfs_range, NULL, NULL); + SERD_FOREACH (p, properties) { + const SerdNode* const prop = serd_statement_subject(p); + const SerdNode* const klass = serd_statement_object(p); + const SerdNode* const prop_string = string_node(ctx, prop); + + // For each statement of this property in the target graph + SerdRange* const statements = + serd_model_range(model, NULL, prop, NULL, ctx->graph); + SERD_FOREACH (statement, statements) { + const SerdNode* const obj = serd_statement_object(statement); + + // Check that the object is in the range + if (check_type(ctx, SERD_CHECK_PROPERTY_RANGE, statement, obj, klass)) { + log_note(ctx, + p, + SERD_CHECK_PROPERTY_RANGE, + "Required for any " NODE_FMT " value", + NODE_ARGS(prop_string)); + } + } + serd_range_free(statements); + } + serd_range_free(properties); + + return st; +} + +static SerdStatus +check_predicate_type(SerdValidator* const ctx) +{ + // For each predicate + SerdStatus st = SERD_SUCCESS; + const SerdNode* last_pred = NULL; + // FIXME: graph + SerdRange* const all = serd_model_all(ctx->model, SERD_ORDER_POS); + SERD_FOREACH (s, all) { + const SerdNode* const pred = serd_statement_predicate(s); + if (serd_node_equals(pred, last_pred)) { + continue; + } + + const bool defined = serd_model_ask(ctx->model, pred, NULL, NULL, NULL); + + st = merge_status(st, + report_check(ctx, + s, + SERD_CHECK_PREDICATE_TYPE, + defined, + "Undefined property <%s>", + serd_node_string(pred))); + + if (defined) { + st = merge_status( + st, + report_check( + ctx, + s, + SERD_CHECK_PREDICATE_TYPE, + serd_model_ask(ctx->model, pred, ctx->uris.rdf_type, NULL, NULL) && + is_a(ctx, pred, ctx->uris.rdf_Property), + "<%s> isn't a property", + serd_node_string(pred))); + } + + last_pred = pred; + } + serd_range_free(all); + + return st; +} + +static SerdStatus +check_acyclic(SerdValidator* const ctx, + const SerdValidatorCheck check, + const SerdNode* const root, + const SerdNode* const node, + const SerdNode* const property, + const char* const fmt) +{ + SerdStatus st = SERD_SUCCESS; + + // FIXME: graph + SerdRange* const links = + serd_model_range(ctx->model, node, property, NULL, NULL); + SERD_FOREACH (l, links) { + const SerdNode* const object = serd_statement_object(l); + const SerdNode* const object_string = string_node(ctx, object); + + if ((st = report_check(ctx, + l, + check, + !serd_node_equals(object, root), + fmt, + NODE_ARGS(object_string)))) { + break; + } + + if ((st = check_acyclic(ctx, check, root, object, property, fmt))) { + log_note(ctx, l, check, "Via " NODE_FMT, NODE_ARGS(object_string)); + break; + } + } + serd_range_free(links); + + return st; +} + +static SerdStatus +check_subclass_cycle(SerdValidator* const ctx) +{ + const SerdModel* const model = ctx->model; + SerdStatus st = SERD_SUCCESS; + + // For each subclass + SerdRange* const properties = + serd_model_range(model, NULL, ctx->uris.rdfs_subClassOf, NULL, NULL); + SERD_FOREACH_NODE (SERD_SUBJECT, root, properties) { + st = merge_status(st, + check_acyclic(ctx, + SERD_CHECK_CLASS_CYCLE, + root, + root, + ctx->uris.rdfs_subClassOf, + "Class " NODE_FMT + " is a sub-class of itself")); + } + serd_range_free(properties); + + return st; +} + +static SerdStatus +check_subproperty_cycle(SerdValidator* const ctx) +{ + const SerdModel* const model = ctx->model; + SerdStatus st = SERD_SUCCESS; + + // For each subproperty relation + SerdRange* const properties = + serd_model_range(model, NULL, ctx->uris.rdfs_subPropertyOf, NULL, NULL); + SERD_FOREACH_NODE (SERD_SUBJECT, root, properties) { + st = merge_status(st, + check_acyclic(ctx, + SERD_CHECK_PROPERTY_CYCLE, + root, + root, + ctx->uris.rdfs_subPropertyOf, + "Property " NODE_FMT + " is a sub-property of itself")); + } + serd_range_free(properties); + + return st; +} + +/* Statement Checks */ + +static SerdStatus +statement_check_valid_literal(SerdValidator* const ctx, + const SerdStatement* const statement) +{ + const SerdNode* const object = serd_statement_object(statement); + if (serd_node_type(object) != SERD_LITERAL) { + return SERD_SUCCESS; + } + + if (!literal_is_valid(ctx, statement, object, serd_node_datatype(object))) { + /* log_note(ctx, l, check, "Via " NODE_FMT, NODE_ARGS(object_string)); */ + + return SERD_ERR_INVALID; + } + + return SERD_SUCCESS; +} + +/* Entry Points */ + +SerdValidator* +serd_validator_new(SerdWorld* const world) +{ + SerdValidator* const validator = + (SerdValidator*)calloc(1, sizeof(SerdValidator)); + + if (!validator) { + return NULL; + } + + SerdNodes* const nodes = world->nodes; + + validator->world = world; + validator->true_node = serd_nodes_manage(nodes, serd_new_boolean(true)); + +#define URI(prefix, suffix) \ + validator->uris.prefix##_##suffix = serd_nodes_manage( \ + nodes, serd_new_uri(SERD_STATIC_STRING(NS_##prefix #suffix))) URI(owl, Class); URI(owl, DatatypeProperty); + URI(owl, DeprecatedClass); + URI(owl, DeprecatedProperty); URI(owl, FunctionalProperty); URI(owl, InverseFunctionalProperty); URI(owl, ObjectProperty); @@ -803,6 +1693,7 @@ init_uris(URIs* uris) URI(owl, Thing); URI(owl, allValuesFrom); URI(owl, cardinality); + URI(owl, deprecated); URI(owl, equivalentClass); URI(owl, maxCardinality); URI(owl, minCardinality); @@ -823,43 +1714,177 @@ init_uris(URIs* uris) URI(rdfs, label); URI(rdfs, range); URI(rdfs, subClassOf); + URI(rdfs, subPropertyOf); URI(xsd, anyURI); - URI(xsd, float); - URI(xsd, decimal); - URI(xsd, double); URI(xsd, maxExclusive); URI(xsd, maxInclusive); URI(xsd, minExclusive); URI(xsd, minInclusive); URI(xsd, pattern); - URI(xsd, string); + +#undef URI + + return validator; +} + +void +serd_validator_free(SerdValidator* const validator) +{ + free(validator); } SerdStatus -serd_validate(const SerdModel* model) +serd_validator_enable_checks(SerdValidator* const validator, + const char* const regex) { - ValidationContext ctx; - memset(&ctx, 0, sizeof(ValidationContext)); - init_uris(&ctx.uris); + if (!strcmp(regex, "all")) { + return serd_validator_enable_checks(validator, ".*"); + } + + RerexPattern* const re = parse_regex(validator->world, NULL, regex); + if (!re) { + return SERD_ERR_BAD_ARG; + } - ctx.model = model; - ctx.n_errors = 0; - ctx.n_restrictions = 0; + bool matched = false; + RerexMatcher* matcher = rerex_new_matcher(re); - int st = 0; - SerdRange* i = serd_model_all(ctx.model); - SERD_FOREACH (statement, i) { - st = check_statement(&ctx, statement) || st; + for (unsigned i = 0; i < N_CHECKS; ++i) { + if (rerex_match(matcher, check_names[i])) { + validator->checks |= (1u << i); + matched = true; + } } - serd_range_free(i); - printf("Found %u errors (checked %u restrictions)\n", - ctx.n_errors, - ctx.n_restrictions); + rerex_free_matcher(matcher); + rerex_free_pattern(re); - for (SerdNode** n = (SerdNode**)&ctx.uris; *n; ++n) { - serd_node_free(*n); + return matched ? SERD_SUCCESS : SERD_FAILURE; +} + +SerdStatus +serd_validator_disable_checks(SerdValidator* const validator, + const char* const regex) +{ + RerexPattern* const re = parse_regex(validator->world, NULL, regex); + if (!re) { + return SERD_ERR_BAD_ARG; + } + + bool matched = false; + RerexMatcher* matcher = rerex_new_matcher(re); + + for (unsigned i = 0; i < N_CHECKS; ++i) { + if (rerex_match(matcher, check_names[i])) { + validator->checks &= ~(1u << i); + matched = true; + } + } + + rerex_free_matcher(matcher); + rerex_free_pattern(re); + + return matched ? SERD_SUCCESS : SERD_FAILURE; +} + +SerdStatus +serd_validate_model(SerdValidator* const validator, + const SerdModel* const model, + const SerdNode* const graph) +{ + SerdValidator* const ctx = validator; + SerdStatus st = SERD_SUCCESS; + + ctx->model = model; + ctx->graph = graph; + + if (ctx->checks & (1u << SERD_CHECK_PREDICATE_TYPE)) { + update_status(&st, check_predicate_type(ctx)); + } + + if (ctx->checks & (1u << SERD_CHECK_CLASS_CYCLE)) { + update_status(&st, check_subclass_cycle(ctx)); + } + + if (ctx->checks & (1u << SERD_CHECK_CLASS_LABEL)) { + update_status(&st, check_class_label(ctx)); + } + + if (ctx->checks & (1u << SERD_CHECK_CLASS_TYPE)) { + update_status(&st, check_class_type(ctx)); + } + + if (ctx->checks & (1u << SERD_CHECK_DATATYPE_PROPERTY)) { + update_status(&st, check_datatype_property(ctx)); + } + + if (ctx->checks & ((1u << SERD_CHECK_DEPRECATED_PROPERTY) | + (1u << SERD_CHECK_DEPRECATED_CLASS))) { + update_status(&st, check_deprecated(ctx)); + } + + if (ctx->checks & (1u << SERD_CHECK_FUNCTIONAL_PROPERTY)) { + update_status(&st, check_functional_property(ctx)); + } + + if (ctx->checks & (1u << SERD_CHECK_INSTANCE_TYPE)) { + update_status(&st, check_instance(ctx)); + } + + if (ctx->checks & (1u << SERD_CHECK_INVERSE_FUNCTIONAL_PROPERTY)) { + update_status(&st, check_inverse_functional_property(ctx)); + } + + if (ctx->checks & (1u << SERD_CHECK_OBJECT_PROPERTY)) { + update_status(&st, check_object_property(ctx)); + } + + if (ctx->checks & (1u << SERD_CHECK_PROPERTY_CYCLE)) { + update_status(&st, check_subproperty_cycle(ctx)); + } + + if (ctx->checks & (1u << SERD_CHECK_PROPERTY_DOMAIN)) { + update_status(&st, check_property_domain(ctx)); + } + + if (ctx->checks & (1u << SERD_CHECK_PROPERTY_LABEL)) { + update_status(&st, check_property_label(ctx)); + } + + if (ctx->checks & (1u << SERD_CHECK_PROPERTY_RANGE)) { + update_status(&st, check_property_range(ctx)); + } + + if (ctx->checks & ((1u << SERD_CHECK_DATATYPE_TYPE) | // + (1u << SERD_CHECK_LITERAL_INSTANCE) | + (1u << SERD_CHECK_LITERAL_MAX_EXCLUSIVE) | + (1u << SERD_CHECK_LITERAL_MAX_INCLUSIVE) | + (1u << SERD_CHECK_LITERAL_MIN_EXCLUSIVE) | + (1u << SERD_CHECK_LITERAL_MIN_INCLUSIVE) | + (1u << SERD_CHECK_LITERAL_PATTERN) | + (1u << SERD_CHECK_LITERAL_RESTRICTION) | + (1u << SERD_CHECK_LITERAL_VALUE))) { + SerdRange* const all = serd_model_all(ctx->model, SERD_ORDER_SPO); + SERD_FOREACH (statement, all) { + update_status(&st, statement_check_valid_literal(ctx, statement)); + } + serd_range_free(all); } - return !st && ctx.n_errors == 0 ? SERD_SUCCESS : SERD_ERR_INVALID; + ctx->graph = NULL; + + return (ctx->n_errors > 0) + ? serd_world_logf_internal(ctx->model->world, + SERD_ERR_INVALID, + SERD_LOG_LEVEL_ERR, + NULL, + "Failed %u of %u validation checks", + ctx->n_errors, + ctx->n_checks) + : serd_world_logf_internal(ctx->model->world, + SERD_SUCCESS, + SERD_LOG_LEVEL_INFO, + NULL, + "Passed all %u validation checks", + ctx->n_checks); } diff --git a/src/world.c b/src/world.c index a6e32dc6..1ea688cb 100644 --- a/src/world.c +++ b/src/world.c @@ -85,7 +85,7 @@ serd_ansi_reset(FILE* stream) #endif } -static const char* const log_level_strings[] = {"emergengy", +static const char* const log_level_strings[] = {"emergency", "alert", "critical", "error", @@ -159,6 +159,14 @@ serd_world_vlogf(const SerdWorld* world, // Using a copy isn't necessary here, but it avoids a clang-tidy bug vfprintf(stderr, fmt, ap); + + // Print clang-tidy-style check suffix + const char* const check = serd_log_entry_get_field(&e, "SERD_CHECK"); + if (check) { + fprintf(stderr, " [%s]", check); + } + + fprintf(stderr, "\n"); } va_end(ap); |