aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2018-05-27 15:48:25 +0200
committerDavid Robillard <d@drobilla.net>2018-05-27 21:10:21 +0200
commit7f75e996c630e96142b842be5f5e193a5e93e634 (patch)
tree60ddd2c8594b38cabd441ad54397313a88a4abd4
parent7a4582a570657cee6e62842a98a0c7be3da7cb03 (diff)
downloadserd-7f75e996c630e96142b842be5f5e193a5e93e634.tar.gz
serd-7f75e996c630e96142b842be5f5e193a5e93e634.tar.bz2
serd-7f75e996c630e96142b842be5f5e193a5e93e634.zip
WIP: Add serd_validatemodel
-rw-r--r--src/serd_validate.c884
-rw-r--r--wscript46
2 files changed, 918 insertions, 12 deletions
diff --git a/src/serd_validate.c b/src/serd_validate.c
new file mode 100644
index 00000000..a190f99d
--- /dev/null
+++ b/src/serd_validate.c
@@ -0,0 +1,884 @@
+/*
+ Copyright 2012-2018 David Robillard <http://drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#define _BSD_SOURCE 1 // for realpath
+#define _DEFAULT_SOURCE 1 // for realpath
+
+#include "serd/serd.h"
+#include "serd/serd.h"
+#include "serd_config.h"
+
+#include <assert.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef _WIN32
+#include <windows.h>
+#endif
+
+#ifdef HAVE_PCRE
+#include <pcre.h>
+#endif
+
+#define USTR(s) ((const uint8_t*)(s))
+
+#define NS_foaf "http://xmlns.com/foaf/0.1/"
+#define NS_owl "http://www.w3.org/2002/07/owl#"
+#define NS_rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+#define NS_rdfs "http://www.w3.org/2000/01/rdf-schema#"
+#define NS_xsd "http://www.w3.org/2001/XMLSchema#"
+
+#define ERROR(msg) fprintf(stderr, "serd_validate: " msg);
+#define ERRORF(fmt, ...) fprintf(stderr, "serd_validate: " fmt, __VA_ARGS__);
+
+#define SERD_FOREACH(name, iter) \
+ for (const SerdStatement* name = NULL; \
+ !serd_iter_end(iter) && (name = serd_iter_get(iter)); \
+ serd_iter_next(iter))
+
+typedef struct
+{
+ SerdNode* foaf_Document;
+ SerdNode* owl_AnnotationProperty;
+ SerdNode* owl_Class;
+ SerdNode* owl_DatatypeProperty;
+ SerdNode* owl_FunctionalProperty;
+ SerdNode* owl_InverseFunctionalProperty;
+ SerdNode* owl_ObjectProperty;
+ SerdNode* owl_OntologyProperty;
+ SerdNode* owl_Restriction;
+ SerdNode* owl_Thing;
+ SerdNode* owl_cardinality;
+ SerdNode* owl_equivalentClass;
+ SerdNode* owl_maxCardinality;
+ SerdNode* owl_minCardinality;
+ SerdNode* owl_onDatatype;
+ SerdNode* owl_onProperty;
+ SerdNode* owl_someValuesFrom;
+ SerdNode* owl_withRestrictions;
+ SerdNode* rdf_PlainLiteral;
+ SerdNode* rdf_Property;
+ SerdNode* rdf_first;
+ SerdNode* rdf_rest;
+ SerdNode* rdf_type;
+ SerdNode* rdfs_Class;
+ SerdNode* rdfs_Datatype;
+ SerdNode* rdfs_Literal;
+ SerdNode* rdfs_Resource;
+ SerdNode* rdfs_domain;
+ SerdNode* rdfs_label;
+ SerdNode* rdfs_range;
+ SerdNode* rdfs_subClassOf;
+ SerdNode* xsd_anyURI;
+ SerdNode* xsd_decimal;
+ SerdNode* xsd_double;
+ SerdNode* xsd_maxInclusive;
+ SerdNode* xsd_minInclusive;
+ SerdNode* xsd_pattern;
+ SerdNode* xsd_string;
+} URIs;
+
+int n_errors = 0;
+int n_restrictions = 0;
+bool one_line_errors = false;
+
+static int
+check_instance(SerdModel* model,
+ const URIs* uris,
+ const SerdNode* restriction,
+ const SerdStatement* statement,
+ const SerdNode* instance);
+
+static int
+print_version(void)
+{
+ printf("serd_validate " SERD_VERSION
+ " <http://drobilla.net/software/serd>\n");
+ printf("Copyright 2012-2018 David Robillard <http://drobilla.net>.\n"
+ "License: <http://www.opensource.org/licenses/isc>\n"
+ "This is free software; you are free to change and redistribute it."
+ "\nThere is NO WARRANTY, to the extent permitted by law.\n");
+ return 0;
+}
+
+static int
+print_usage(const char* name, bool error)
+{
+ FILE* const os = error ? stderr : stdout;
+ fprintf(os, "Usage: %s [OPTION]... INPUT...\n", name);
+ fprintf(os, "Validate RDF data\n\n");
+ fprintf(os, " -h Display this help and exit\n");
+ fprintf(os, " -l Print errors on a single line.\n");
+ fprintf(os, " -v Display version information and exit\n");
+ fprintf(os,
+ "Validate RDF data. This is a simple validator which checks\n"
+ "that all used properties are actually defined. It does not do\n"
+ "any fancy file retrieval, the files passed on the command line\n"
+ "are the only data that is read. In other words, you must pass\n"
+ "the definition of all vocabularies used on the command line.\n");
+ return error ? 1 : 0;
+}
+
+static char*
+absolute_path(const char* path)
+{
+#ifdef _WIN32
+ char* out = (char*)malloc(MAX_PATH);
+ GetFullPathName(path, MAX_PATH, out, NULL);
+ return out;
+#else
+ return realpath(path, NULL);
+#endif
+}
+
+static int
+errorf(const SerdStatement* statement, const char* fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ fprintf(stderr, "error: ");
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+
+ const char* sep = one_line_errors ? "\t" : "\n ";
+ fprintf(stderr,
+ "%s%s%s%s%s%s\n",
+ sep,
+ serd_node_get_string(serd_statement_get_subject(statement)),
+ sep,
+ serd_node_get_string(serd_statement_get_predicate(statement)),
+ sep,
+ serd_node_get_string(serd_statement_get_object(statement)));
+
+ ++n_errors;
+ return 1;
+}
+
+static bool
+is_descendant_of(SerdModel* model,
+ const URIs* uris,
+ const SerdNode* child,
+ const SerdNode* parent,
+ const SerdNode* pred)
+{
+ if (!child) {
+ return false;
+ } else if (serd_node_equals(child, parent) ||
+ serd_model_ask(
+ model, child, uris->owl_equivalentClass, parent, NULL)) {
+ return true;
+ }
+
+ SerdIter* i = serd_model_find(model, child, pred, NULL, NULL);
+ SERD_FOREACH (s, i) {
+ const SerdNode* o = serd_statement_get_object(s);
+ if (serd_node_equals(child, o)) {
+ continue; // Weird class is explicitly a descendent of itself
+ }
+ if (is_descendant_of(model, uris, o, parent, pred)) {
+ serd_iter_free(i);
+ return true;
+ }
+ }
+ serd_iter_free(i);
+
+ return false;
+}
+
+static bool
+regexp_match(const char* pat, const char* str)
+{
+#ifdef HAVE_PCRE
+ // Append a $ to the pattern so we only match if the entire string matches
+ const size_t len = strlen(pat);
+ char* const regx = (char*)malloc(len + 2);
+ memcpy(regx, pat, len);
+ regx[len] = '$';
+ regx[len + 1] = '\0';
+
+ const char* err;
+ int erroffset;
+ pcre* re = pcre_compile(regx, PCRE_ANCHORED, &err, &erroffset, NULL);
+ free(regx);
+ if (!re) {
+ fprintf(stderr,
+ "Error in pattern `%s' at offset %d (%s)\n",
+ pat,
+ erroffset,
+ err);
+ return false;
+ }
+
+ const bool ret = pcre_exec(re, NULL, str, strlen(str), 0, 0, NULL, 0) >= 0;
+ pcre_free(re);
+ return ret;
+#endif // HAVE_PCRE
+ return true;
+}
+
+static int
+bound_cmp(SerdModel* model,
+ const URIs* uris,
+ const SerdNode* literal,
+ const SerdNode* type,
+ const SerdNode* bound)
+{
+ const char* str = serd_node_get_string(literal);
+ const char* bound_str = serd_node_get_string(bound);
+ const SerdNode* pred = uris->owl_onDatatype;
+ const bool is_numeric =
+ is_descendant_of(model, uris, type, uris->xsd_decimal, pred) ||
+ is_descendant_of(model, uris, type, uris->xsd_double, pred);
+
+ if (is_numeric) {
+ const double fbound = serd_strtod(bound_str, NULL);
+ const double fliteral = serd_strtod(str, NULL);
+ return ((fliteral < fbound) ? -1 : (fliteral > fbound) ? 1 : 0);
+ } else {
+ return strcmp(str, bound_str);
+ }
+}
+
+static bool
+check_restriction(SerdModel* model,
+ const URIs* uris,
+ const SerdNode* literal,
+ const SerdNode* type,
+ const SerdNode* restriction)
+{
+ const char* str = serd_node_get_string(literal);
+
+ // Check xsd:pattern
+ const SerdNode* pat =
+ serd_model_get(model, restriction, uris->xsd_pattern, 0, 0);
+ if (pat) {
+ if (!regexp_match(serd_node_get_string(pat), str)) {
+ fprintf(stderr,
+ "`%s' does not match <%s> pattern `%s'\n",
+ serd_node_get_string(literal),
+ serd_node_get_string(type),
+ serd_node_get_string(pat));
+ return false;
+ }
+ ++n_restrictions;
+ }
+
+ // Check xsd:minInclusive
+ const SerdNode* lower =
+ serd_model_get(model, restriction, uris->xsd_minInclusive, 0, 0);
+ if (lower) {
+ if (bound_cmp(model, uris, literal, type, lower) < 0) {
+ fprintf(stderr,
+ "`%s' is not >= <%s> minimum `%s'\n",
+ serd_node_get_string(literal),
+ serd_node_get_string(type),
+ serd_node_get_string(lower));
+ return false;
+ }
+ ++n_restrictions;
+ }
+
+ // Check xsd:maxInclusive
+ const SerdNode* upper =
+ serd_model_get(model, restriction, uris->xsd_maxInclusive, 0, 0);
+ if (upper) {
+ if (bound_cmp(model, uris, literal, type, upper) > 0) {
+ fprintf(stderr,
+ "`%s' is not <= <%s> maximum `%s'\n",
+ serd_node_get_string(literal),
+ serd_node_get_string(type),
+ serd_node_get_string(upper));
+ return false;
+ }
+ ++n_restrictions;
+ }
+
+ return true; // Unknown restriction, be quietly tolerant
+}
+
+static bool
+literal_is_valid(SerdModel* model,
+ const URIs* uris,
+ const SerdStatement* statement,
+ const SerdNode* literal,
+ const SerdNode* type)
+{
+ if (!type) {
+ return true;
+ }
+
+ /* Check that literal data is related to required type. We don't do a
+ strict subtype check here because e.g. an xsd:decimal might be a valid
+ xsd:unsignedInt, which the pattern checks will verify, but if the
+ literal type is not related to the required type at all
+ (e.g. xsd:decimal and xsd:string) there is a problem. */
+ const SerdNode* datatype = serd_node_get_datatype(literal);
+ if (datatype && datatype != type) {
+ if (!is_descendant_of(
+ model, uris, datatype, type, uris->owl_onDatatype) &&
+ !is_descendant_of(
+ model, uris, type, datatype, uris->owl_onDatatype) &&
+ !(serd_node_equals(datatype, uris->xsd_decimal) &&
+ is_descendant_of(model,
+ uris,
+ type,
+ uris->xsd_double,
+ uris->owl_onDatatype))) {
+ errorf(statement,
+ "Literal `%s' datatype <%s> is not compatible with <%s>\n",
+ serd_node_get_string(literal),
+ serd_node_get_string(datatype),
+ serd_node_get_string(type));
+ return false;
+ }
+ }
+
+ // Find restrictions list
+ const SerdNode* head =
+ serd_model_get(model, type, uris->owl_withRestrictions, 0, 0);
+ if (!head) {
+ return true; // No restrictions
+ }
+
+ // Walk list, checking each restriction
+ while (head) {
+ SerdIter* f = serd_model_find(model, head, uris->rdf_first, 0, 0);
+ if (!f) {
+ break; // Reached end of restrictions list without failure
+ }
+
+ // Check this restriction
+ const bool good =
+ check_restriction(model,
+ uris,
+ literal,
+ type,
+ serd_statement_get_object(serd_iter_get(f)));
+ serd_iter_free(f);
+
+ if (!good) {
+ return false; // Failed, literal is invalid
+ }
+
+ // Seek to next list node
+ SerdIter* n = serd_model_find(model, head, uris->rdf_rest, 0, 0);
+ head = n ? serd_statement_get_object(serd_iter_get(n)) : NULL;
+ serd_iter_free(n);
+ }
+
+ const SerdNode* super =
+ serd_model_get(model, type, uris->owl_onDatatype, 0, 0);
+ if (super) {
+ const bool good =
+ literal_is_valid(model, uris, statement, literal, super);
+ return good; // Match iff literal also matches supertype
+ }
+
+ return true; // Matches top level type
+}
+
+static bool
+check_resource_type(SerdModel* model,
+ const URIs* uris,
+ const SerdStatement* statement,
+ const SerdNode* node,
+ const SerdNode* klass)
+{
+ if (serd_node_get_type(node) == SERD_LITERAL) {
+ errorf(statement, "Literal found where class instance expected");
+ return false;
+ } else if (is_descendant_of(model,
+ uris,
+ klass,
+ uris->rdfs_Literal,
+ uris->rdfs_subClassOf) ||
+ serd_model_ask(
+ model, klass, uris->rdf_type, uris->rdfs_Datatype, 0)) {
+ errorf(statement, "Resource found where literal expected");
+ return false;
+ }
+
+ if (serd_model_ask(
+ model, klass, uris->rdf_type, uris->owl_Restriction, NULL)) {
+ if (check_instance(model, uris, klass, statement, node)) {
+ return false;
+ }
+ }
+
+ SerdIter* r = serd_model_find(model, klass, uris->rdfs_subClassOf, NULL, NULL);
+ SERD_FOREACH (s, r) {
+ const SerdNode* super = serd_statement_get_object(s);
+ if (!check_resource_type(model, uris, statement, node, super)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static bool
+check_type(SerdModel* model,
+ const URIs* uris,
+ const SerdStatement* statement,
+ const SerdNode* node,
+ const SerdNode* type)
+{
+ if (serd_node_equals(type, uris->rdfs_Resource) ||
+ serd_node_equals(type, uris->owl_Thing)) {
+ return true;
+ }
+
+ if (serd_node_get_type(node) == SERD_LITERAL) {
+ if (serd_node_equals(type, uris->rdfs_Literal)) {
+ return true;
+ } else if (serd_node_equals(type, uris->rdf_PlainLiteral)) {
+ return !serd_node_get_language(node);
+ } else {
+ return literal_is_valid(model, uris, statement, node, type);
+ }
+ } else if (serd_node_get_type(node) == SERD_URI) {
+ if (serd_node_equals(type, uris->foaf_Document)) {
+ return true; // Questionable...
+ } else if (is_descendant_of(model,
+ uris,
+ type,
+ uris->xsd_anyURI,
+ uris->owl_onDatatype)) {
+ /* Type is any URI and this is a URI, so pass. Restrictions on
+ anyURI subtypes are not currently checked (very uncommon). */
+ return true; // Type is anyURI, and this is a URI
+ } else {
+ return check_resource_type(model, uris, statement, node, type);
+ }
+ } else {
+ if (!check_resource_type(model, uris, statement, node, type)) {
+ return false;
+ }
+
+ return true; // Blanks often lack explicit types, ignore
+ }
+
+ return false;
+}
+
+static uint64_t
+count_non_blanks(SerdIter* i, SerdField field)
+{
+ uint64_t n = 0;
+ SERD_FOREACH (s, i) {
+ const SerdNode* node = serd_statement_get_node(s, field);
+ if (serd_node_get_type(node) != SERD_BLANK) {
+ ++n;
+ }
+ }
+ return n;
+}
+
+static int
+check_properties(SerdModel* model, URIs* uris)
+{
+ int st = 0;
+ SerdIter* i = serd_model_begin(model);
+ SERD_FOREACH (statement, i) {
+ const SerdNode* subj = serd_statement_get_subject(statement);
+ const SerdNode* pred = serd_statement_get_predicate(statement);
+ const SerdNode* obj = serd_statement_get_object(statement);
+
+ bool is_any_property = false;
+ SerdIter* t = serd_model_find(model, pred, uris->rdf_type, NULL, NULL);
+ SERD_FOREACH (s, t) {
+ if (is_descendant_of(model,
+ uris,
+ serd_statement_get_object(s),
+ uris->rdf_Property,
+ uris->rdfs_subClassOf)) {
+ is_any_property = true;
+ break;
+ }
+ }
+ serd_iter_free(t);
+
+ const bool is_ObjectProperty = serd_model_ask(
+ model, pred, uris->rdf_type, uris->owl_ObjectProperty, 0);
+ const bool is_FunctionalProperty = serd_model_ask(
+ model, pred, uris->rdf_type, uris->owl_FunctionalProperty, 0);
+ const bool is_InverseFunctionalProperty =
+ serd_model_ask(model,
+ pred,
+ uris->rdf_type,
+ uris->owl_InverseFunctionalProperty,
+ 0);
+ const bool is_DatatypeProperty = serd_model_ask(
+ model, pred, uris->rdf_type, uris->owl_DatatypeProperty, 0);
+
+ if (!is_any_property) {
+ st = errorf(statement, "Use of undefined property");
+ }
+
+ if (!serd_model_ask(model, pred, uris->rdfs_label, NULL, NULL)) {
+ st = errorf(statement,
+ "Property <%s> has no label",
+ serd_node_get_string(pred));
+ }
+
+ if (is_DatatypeProperty && serd_node_get_type(obj) != SERD_LITERAL) {
+ st = errorf(statement, "Datatype property with non-literal value");
+ }
+
+ if (is_ObjectProperty && serd_node_get_type(obj) == SERD_LITERAL) {
+ st = errorf(statement, "Object property with literal value");
+ }
+
+ if (is_FunctionalProperty) {
+ SerdIter* o = serd_model_find(model, subj, pred, NULL, NULL);
+ const uint64_t n = count_non_blanks(o, SERD_OBJECT);
+ if (n > 1) {
+ st = errorf(
+ statement, "Functional property with %u objects", n);
+ }
+ serd_iter_free(o);
+ }
+
+ if (is_InverseFunctionalProperty) {
+ SerdIter* s = serd_model_find(model, NULL, pred, obj, NULL);
+ const unsigned n = count_non_blanks(s, SERD_SUBJECT);
+ if (n > 1) {
+ st = errorf(statement,
+ "Inverse functional property with %u subjects",
+ n);
+ }
+ serd_iter_free(s);
+ }
+
+ if (serd_node_equals(pred, uris->rdf_type) &&
+ !serd_model_ask(
+ model, obj, uris->rdf_type, uris->rdfs_Class, NULL) &&
+ !serd_model_ask(
+ model, obj, uris->rdf_type, uris->owl_Class, NULL)) {
+ st = errorf(statement, "Type is not a rdfs:Class or owl:Class");
+ }
+
+ if (serd_node_get_type(obj) == SERD_LITERAL &&
+ !literal_is_valid(
+ model, uris, statement, obj, serd_node_get_datatype(obj))) {
+ st = errorf(statement, "Literal does not match datatype");
+ }
+
+ SerdIter* r =
+ serd_model_find(model, pred, uris->rdfs_range, NULL, NULL);
+ SERD_FOREACH (s, r) {
+ const SerdNode* range = serd_statement_get_object(s);
+ if (!check_type(model, uris, statement, obj, range)) {
+ st = errorf(statement,
+ "Object not in range <%s>",
+ serd_node_get_string(range));
+ }
+ }
+ serd_iter_free(r);
+
+ SerdIter* d =
+ serd_model_find(model, pred, uris->rdfs_domain, NULL, NULL);
+ if (d) {
+ const SerdNode* domain =
+ serd_statement_get_object(serd_iter_get(d));
+ if (!check_type(model, uris, statement, subj, domain)) {
+ st = errorf(statement,
+ "Subject not in domain <%s>",
+ serd_node_get_string(domain));
+ }
+ serd_iter_free(d);
+ }
+ }
+ serd_iter_free(i);
+
+ return st;
+}
+
+static int
+check_instance(SerdModel* model,
+ const URIs* uris,
+ const SerdNode* restriction,
+ const SerdStatement* statement,
+ const SerdNode* instance)
+{
+ int st = 0;
+ const SerdNode* prop = serd_model_get(
+ model, restriction, uris->owl_onProperty, NULL, NULL);
+ if (!prop) {
+ return 0;
+ }
+
+ const unsigned values = serd_model_count(model, instance, prop, NULL, NULL);
+
+ // Check exact cardinality
+ const SerdNode* card = serd_model_get(
+ model, restriction, uris->owl_cardinality, NULL, NULL);
+ if (card) {
+ const unsigned c = atoi(serd_node_get_string(card));
+ if (values != c) {
+ st = errorf(statement,
+ "Property %s on %s has %u != %u values",
+ serd_node_get_string(prop),
+ serd_node_get_string(instance),
+ values,
+ c);
+ }
+ }
+
+ // Check minimum cardinality
+ const SerdNode* minCard = serd_model_get(
+ model, restriction, uris->owl_minCardinality, NULL, NULL);
+ if (minCard) {
+ const unsigned m = atoi(serd_node_get_string(minCard));
+ if (values < m) {
+ st = errorf(statement,
+ "Property %s on %s has %u < %u values",
+ serd_node_get_string(prop),
+ serd_node_get_string(instance),
+ values,
+ m);
+ }
+ }
+
+ // Check maximum cardinality
+ const SerdNode* maxCard = serd_model_get(
+ model, restriction, uris->owl_maxCardinality, NULL, NULL);
+ if (maxCard) {
+ const unsigned m = atoi(serd_node_get_string(maxCard));
+ if (values < m) {
+ st = errorf(statement,
+ "Property %s on %s has %u > %u values",
+ serd_node_get_string(prop),
+ serd_node_get_string(instance),
+ values,
+ m);
+ }
+ }
+
+ // Check someValuesFrom
+ const SerdNode* type = serd_model_get(
+ model, restriction, uris->owl_someValuesFrom, 0, 0);
+ if (type) {
+ SerdIter* v = serd_model_find(model, instance, prop, NULL, NULL);
+ bool found = false;
+ SERD_FOREACH (s, v) {
+ const SerdNode* value = serd_statement_get_object(s);
+ if (check_type(model, uris, statement, value, type)) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ st = errorf(statement,
+ "%s has no <%s> values of type <%s>\n",
+ serd_node_get_string(instance),
+ serd_node_get_string(prop),
+ serd_node_get_string(type));
+ }
+ serd_iter_free(v);
+ }
+
+ return st;
+}
+
+static int
+check_class_instances(SerdModel* model,
+ const URIs* uris,
+ const SerdNode* restriction,
+ const SerdNode* klass)
+{
+ // Check immediate instances of this class
+ SerdIter* i = serd_model_find(model, NULL, uris->rdf_type, klass, NULL);
+ SERD_FOREACH (s, i) {
+ check_instance(
+ model, uris, restriction, s, serd_statement_get_subject(s));
+ }
+ serd_iter_free(i);
+
+ // Check instances of all subclasses recursively
+ SerdIter* s =
+ serd_model_find(model, NULL, uris->rdfs_subClassOf, klass, NULL);
+ SERD_FOREACH (statement, s) {
+ const SerdNode* subklass = serd_statement_get_subject(statement);
+ check_class_instances(model, uris, restriction, subklass);
+ }
+ serd_iter_free(s);
+
+ return 0;
+}
+
+static int
+check_instances(SerdModel* model, const URIs* uris)
+{
+ int st = 0;
+ SerdIter* r = serd_model_find(
+ model, NULL, uris->rdf_type, uris->owl_Restriction, NULL);
+ SERD_FOREACH (s, r) {
+ const SerdNode* restriction = serd_statement_get_subject(s);
+ const SerdNode* prop = serd_model_get(
+ model, restriction, uris->owl_onProperty, NULL, NULL);
+ if (!prop) {
+ continue;
+ }
+
+ SerdIter* c = serd_model_find(
+ model, NULL, uris->rdfs_subClassOf, restriction, NULL);
+ SERD_FOREACH (t, c) {
+ const SerdNode* klass = serd_statement_get_subject(t);
+ check_class_instances(model, uris, restriction, klass);
+ }
+ serd_iter_free(c);
+ }
+ serd_iter_free(r);
+
+ return st;
+}
+
+static int
+missing_arg(const char* name, char opt)
+{
+ ERRORF("option requires an argument -- '%c'\n", opt);
+ return print_usage(name, true);
+}
+
+int
+main(int argc, char** argv)
+{
+ if (argc < 2) {
+ return print_usage(argv[0], true);
+ }
+
+ int a = 1;
+ long stack_size = 4194304;
+ for (; a < argc && argv[a][0] == '-'; ++a) {
+ if (argv[a][1] == 'k') {
+ if (++a == argc) {
+ return missing_arg(argv[0], 'k');
+ }
+ stack_size = strtol(argv[a], NULL, 10);
+ if (stack_size <= 0 || stack_size == LONG_MAX) {
+ ERRORF("stack size `%ld' out of range\n", stack_size);
+ return 1;
+ }
+ } else if (argv[a][1] == 'l') {
+ one_line_errors = true;
+ } else if (argv[a][1] == 'v') {
+ return print_version();
+ } else {
+ fprintf(stderr, "%s: Unknown option `%s'\n", argv[0], argv[a]);
+ return print_usage(argv[0], true);
+ }
+ }
+
+ SerdWorld* world = serd_world_new();
+ SerdModel* model = serd_model_new(world, SERD_SPO | SERD_OPS, false);
+ SerdEnv* env = serd_env_new(NULL);
+ SerdInserter* inserter = serd_inserter_new(model, env, NULL);
+
+ const SerdSinkInterface* sink = serd_inserter_get_sink_interface(inserter);
+
+ // FIXME: syntax
+ SerdReader* reader = serd_reader_new(world, SERD_TURTLE, sink, stack_size);
+
+ for (; a < argc; ++a) {
+ const char* input = argv[a];
+ char* in_path = absolute_path(input);
+
+ if (!in_path) {
+ fprintf(stderr, "Skipping file %s\n", input);
+ continue;
+ }
+
+ SerdNode* base_uri_node = serd_node_new_file_uri(in_path, NULL, true);
+
+ serd_env_set_base_uri(env, base_uri_node);
+ SerdStatus st = serd_reader_start_file(reader, input, true);
+ st = serd_reader_read_document(reader);
+ st = serd_reader_end_stream(reader);
+
+ if (st) {
+ fprintf(stderr,
+ "error reading %s: %s\n",
+ in_path,
+ serd_strerror(st));
+ }
+
+ serd_node_free(base_uri_node);
+ free(in_path);
+ }
+ serd_reader_free(reader);
+ serd_env_free(env);
+
+#define URI(prefix, suffix) \
+ uris.prefix##_##suffix = serd_node_new_uri(NS_##prefix #suffix)
+
+ URIs uris;
+ URI(foaf, Document);
+ URI(owl, AnnotationProperty);
+ URI(owl, Class);
+ URI(owl, DatatypeProperty);
+ URI(owl, FunctionalProperty);
+ URI(owl, InverseFunctionalProperty);
+ URI(owl, ObjectProperty);
+ URI(owl, OntologyProperty);
+ URI(owl, Restriction);
+ URI(owl, Thing);
+ URI(owl, cardinality);
+ URI(owl, equivalentClass);
+ URI(owl, maxCardinality);
+ URI(owl, minCardinality);
+ URI(owl, onDatatype);
+ URI(owl, onProperty);
+ URI(owl, someValuesFrom);
+ URI(owl, withRestrictions);
+ URI(rdf, PlainLiteral);
+ URI(rdf, Property);
+ URI(rdf, first);
+ URI(rdf, rest);
+ URI(rdf, type);
+ URI(rdfs, Class);
+ URI(rdfs, Datatype);
+ URI(rdfs, Literal);
+ URI(rdfs, Resource);
+ URI(rdfs, domain);
+ URI(rdfs, label);
+ URI(rdfs, range);
+ URI(rdfs, subClassOf);
+ URI(xsd, anyURI);
+ URI(xsd, decimal);
+ URI(xsd, double);
+ URI(xsd, maxInclusive);
+ URI(xsd, minInclusive);
+ URI(xsd, pattern);
+ URI(xsd, string);
+
+#ifndef HAVE_PCRE
+ fprintf(stderr, "warning: Built without PCRE, datatypes not checked.\n");
+#endif
+
+ const int prop_st = check_properties(model, &uris);
+ const int inst_st = check_instances(model, &uris);
+
+ printf("Found %d errors among %d files (checked %d restrictions)\n",
+ n_errors,
+ argc - 1,
+ n_restrictions);
+
+ serd_model_free(model);
+ serd_world_free(world);
+ return prop_st || inst_st;
+}
diff --git a/wscript b/wscript
index fedc2bc4..9f67a942 100644
--- a/wscript
+++ b/wscript
@@ -67,6 +67,19 @@ def configure(conf):
defines = ['_POSIX_C_SOURCE=200809L'],
mandatory = False)
+ autowaf.check_pkg(conf, 'libpcre', uselib_store='PCRE', mandatory=False)
+ if conf.env.HAVE_PCRE:
+ if conf.check(cflags=['-pthread'], mandatory=False):
+ conf.env.PTHREAD_CFLAGS = ['-pthread']
+ if conf.env.CC_NAME != 'clang':
+ conf.env.PTHREAD_LINKFLAGS = ['-pthread']
+ elif conf.check(linkflags=['-lpthread'], mandatory=False):
+ conf.env.PTHREAD_CFLAGS = []
+ conf.env.PTHREAD_LINKFLAGS = ['-lpthread']
+ else:
+ conf.env.PTHREAD_CFLAGS = []
+ conf.env.PTHREAD_LINKFLAGS = []
+
dump = Options.options.dump.split(',')
if 'all' in dump or 'iter' in dump:
conf.define('SERD_DEBUG_ITER', 1)
@@ -176,18 +189,27 @@ def build(bld):
# Utilities
if bld.env.BUILD_UTILS:
- obj = bld(features = 'c cprogram',
- source = 'src/serdi.c',
- target = 'serdi',
- includes = ['.', './src'],
- use = 'libserd',
- lib = lib_args['lib'],
- install_path = '${BINDIR}')
- if not bld.env.BUILD_SHARED or bld.env.STATIC_PROGS:
- obj.use = 'libserd_static'
- if bld.env.STATIC_PROGS:
- obj.env.SHLIB_MARKER = obj.env.STLIB_MARKER
- obj.linkflags = ['-static']
+ utils = ['serdi']
+ if bld.env.HAVE_PCRE:
+ utils += ['serd_validate']
+
+ for i in utils:
+ obj = bld(features = 'c cprogram',
+ source = 'src/%s.c' % i,
+ target = i,
+ includes = ['.', './src'],
+ use = 'libserd',
+ lib = lib_args['lib'],
+ install_path = '${BINDIR}')
+ if not bld.env.BUILD_SHARED or bld.env.STATIC_PROGS:
+ obj.use = 'libserd_static'
+ if bld.env.STATIC_PROGS:
+ obj.env.SHLIB_MARKER = obj.env.STLIB_MARKER
+ obj.linkflags = ['-static']
+ if i == 'serd_validate':
+ autowaf.use_lib(bld, obj, 'PCRE')
+ obj.cflags = bld.env.PTHREAD_CFLAGS
+ obj.linkflags = bld.env.PTHREAD_LINKFLAGS
# Documentation
autowaf.build_dox(bld, 'SERD', SERD_VERSION, top, out)