diff options
author | David Robillard <d@drobilla.net> | 2018-05-27 15:48:25 +0200 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2018-05-27 21:10:21 +0200 |
commit | 7f75e996c630e96142b842be5f5e193a5e93e634 (patch) | |
tree | 60ddd2c8594b38cabd441ad54397313a88a4abd4 | |
parent | 7a4582a570657cee6e62842a98a0c7be3da7cb03 (diff) | |
download | serd-model.tar.gz serd-model.tar.bz2 serd-model.zip |
WIP: Add serd_validatemodel
-rw-r--r-- | src/serd_validate.c | 884 | ||||
-rw-r--r-- | wscript | 46 |
2 files changed, 918 insertions, 12 deletions
diff --git a/src/serd_validate.c b/src/serd_validate.c new file mode 100644 index 00000000..a190f99d --- /dev/null +++ b/src/serd_validate.c @@ -0,0 +1,884 @@ +/* + Copyright 2012-2018 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#define _BSD_SOURCE 1 // for realpath +#define _DEFAULT_SOURCE 1 // for realpath + +#include "serd/serd.h" +#include "serd/serd.h" +#include "serd_config.h" + +#include <assert.h> +#include <limits.h> +#include <stdlib.h> +#include <string.h> + +#ifdef _WIN32 +#include <windows.h> +#endif + +#ifdef HAVE_PCRE +#include <pcre.h> +#endif + +#define USTR(s) ((const uint8_t*)(s)) + +#define NS_foaf "http://xmlns.com/foaf/0.1/" +#define NS_owl "http://www.w3.org/2002/07/owl#" +#define NS_rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#" +#define NS_rdfs "http://www.w3.org/2000/01/rdf-schema#" +#define NS_xsd "http://www.w3.org/2001/XMLSchema#" + +#define ERROR(msg) fprintf(stderr, "serd_validate: " msg); +#define ERRORF(fmt, ...) fprintf(stderr, "serd_validate: " fmt, __VA_ARGS__); + +#define SERD_FOREACH(name, iter) \ + for (const SerdStatement* name = NULL; \ + !serd_iter_end(iter) && (name = serd_iter_get(iter)); \ + serd_iter_next(iter)) + +typedef struct +{ + SerdNode* foaf_Document; + SerdNode* owl_AnnotationProperty; + SerdNode* owl_Class; + SerdNode* owl_DatatypeProperty; + SerdNode* owl_FunctionalProperty; + SerdNode* owl_InverseFunctionalProperty; + SerdNode* owl_ObjectProperty; + SerdNode* owl_OntologyProperty; + SerdNode* owl_Restriction; + SerdNode* owl_Thing; + SerdNode* owl_cardinality; + SerdNode* owl_equivalentClass; + SerdNode* owl_maxCardinality; + SerdNode* owl_minCardinality; + SerdNode* owl_onDatatype; + SerdNode* owl_onProperty; + SerdNode* owl_someValuesFrom; + SerdNode* owl_withRestrictions; + SerdNode* rdf_PlainLiteral; + SerdNode* rdf_Property; + SerdNode* rdf_first; + SerdNode* rdf_rest; + SerdNode* rdf_type; + SerdNode* rdfs_Class; + SerdNode* rdfs_Datatype; + SerdNode* rdfs_Literal; + SerdNode* rdfs_Resource; + SerdNode* rdfs_domain; + SerdNode* rdfs_label; + SerdNode* rdfs_range; + SerdNode* rdfs_subClassOf; + SerdNode* xsd_anyURI; + SerdNode* xsd_decimal; + SerdNode* xsd_double; + SerdNode* xsd_maxInclusive; + SerdNode* xsd_minInclusive; + SerdNode* xsd_pattern; + SerdNode* xsd_string; +} URIs; + +int n_errors = 0; +int n_restrictions = 0; +bool one_line_errors = false; + +static int +check_instance(SerdModel* model, + const URIs* uris, + const SerdNode* restriction, + const SerdStatement* statement, + const SerdNode* instance); + +static int +print_version(void) +{ + printf("serd_validate " SERD_VERSION + " <http://drobilla.net/software/serd>\n"); + printf("Copyright 2012-2018 David Robillard <http://drobilla.net>.\n" + "License: <http://www.opensource.org/licenses/isc>\n" + "This is free software; you are free to change and redistribute it." + "\nThere is NO WARRANTY, to the extent permitted by law.\n"); + return 0; +} + +static int +print_usage(const char* name, bool error) +{ + FILE* const os = error ? stderr : stdout; + fprintf(os, "Usage: %s [OPTION]... INPUT...\n", name); + fprintf(os, "Validate RDF data\n\n"); + fprintf(os, " -h Display this help and exit\n"); + fprintf(os, " -l Print errors on a single line.\n"); + fprintf(os, " -v Display version information and exit\n"); + fprintf(os, + "Validate RDF data. This is a simple validator which checks\n" + "that all used properties are actually defined. It does not do\n" + "any fancy file retrieval, the files passed on the command line\n" + "are the only data that is read. In other words, you must pass\n" + "the definition of all vocabularies used on the command line.\n"); + return error ? 1 : 0; +} + +static char* +absolute_path(const char* path) +{ +#ifdef _WIN32 + char* out = (char*)malloc(MAX_PATH); + GetFullPathName(path, MAX_PATH, out, NULL); + return out; +#else + return realpath(path, NULL); +#endif +} + +static int +errorf(const SerdStatement* statement, const char* fmt, ...) +{ + va_list args; + va_start(args, fmt); + fprintf(stderr, "error: "); + vfprintf(stderr, fmt, args); + va_end(args); + + const char* sep = one_line_errors ? "\t" : "\n "; + fprintf(stderr, + "%s%s%s%s%s%s\n", + sep, + serd_node_get_string(serd_statement_get_subject(statement)), + sep, + serd_node_get_string(serd_statement_get_predicate(statement)), + sep, + serd_node_get_string(serd_statement_get_object(statement))); + + ++n_errors; + return 1; +} + +static bool +is_descendant_of(SerdModel* model, + const URIs* uris, + const SerdNode* child, + const SerdNode* parent, + const SerdNode* pred) +{ + if (!child) { + return false; + } else if (serd_node_equals(child, parent) || + serd_model_ask( + model, child, uris->owl_equivalentClass, parent, NULL)) { + return true; + } + + SerdIter* i = serd_model_find(model, child, pred, NULL, NULL); + SERD_FOREACH (s, i) { + const SerdNode* o = serd_statement_get_object(s); + if (serd_node_equals(child, o)) { + continue; // Weird class is explicitly a descendent of itself + } + if (is_descendant_of(model, uris, o, parent, pred)) { + serd_iter_free(i); + return true; + } + } + serd_iter_free(i); + + return false; +} + +static bool +regexp_match(const char* pat, const char* str) +{ +#ifdef HAVE_PCRE + // Append a $ to the pattern so we only match if the entire string matches + const size_t len = strlen(pat); + char* const regx = (char*)malloc(len + 2); + memcpy(regx, pat, len); + regx[len] = '$'; + regx[len + 1] = '\0'; + + const char* err; + int erroffset; + pcre* re = pcre_compile(regx, PCRE_ANCHORED, &err, &erroffset, NULL); + free(regx); + if (!re) { + fprintf(stderr, + "Error in pattern `%s' at offset %d (%s)\n", + pat, + erroffset, + err); + return false; + } + + const bool ret = pcre_exec(re, NULL, str, strlen(str), 0, 0, NULL, 0) >= 0; + pcre_free(re); + return ret; +#endif // HAVE_PCRE + return true; +} + +static int +bound_cmp(SerdModel* model, + const URIs* uris, + const SerdNode* literal, + const SerdNode* type, + const SerdNode* bound) +{ + const char* str = serd_node_get_string(literal); + const char* bound_str = serd_node_get_string(bound); + const SerdNode* pred = uris->owl_onDatatype; + const bool is_numeric = + is_descendant_of(model, uris, type, uris->xsd_decimal, pred) || + is_descendant_of(model, uris, type, uris->xsd_double, pred); + + if (is_numeric) { + const double fbound = serd_strtod(bound_str, NULL); + const double fliteral = serd_strtod(str, NULL); + return ((fliteral < fbound) ? -1 : (fliteral > fbound) ? 1 : 0); + } else { + return strcmp(str, bound_str); + } +} + +static bool +check_restriction(SerdModel* model, + const URIs* uris, + const SerdNode* literal, + const SerdNode* type, + const SerdNode* restriction) +{ + const char* str = serd_node_get_string(literal); + + // Check xsd:pattern + const SerdNode* pat = + serd_model_get(model, restriction, uris->xsd_pattern, 0, 0); + if (pat) { + if (!regexp_match(serd_node_get_string(pat), str)) { + fprintf(stderr, + "`%s' does not match <%s> pattern `%s'\n", + serd_node_get_string(literal), + serd_node_get_string(type), + serd_node_get_string(pat)); + return false; + } + ++n_restrictions; + } + + // Check xsd:minInclusive + const SerdNode* lower = + serd_model_get(model, restriction, uris->xsd_minInclusive, 0, 0); + if (lower) { + if (bound_cmp(model, uris, literal, type, lower) < 0) { + fprintf(stderr, + "`%s' is not >= <%s> minimum `%s'\n", + serd_node_get_string(literal), + serd_node_get_string(type), + serd_node_get_string(lower)); + return false; + } + ++n_restrictions; + } + + // Check xsd:maxInclusive + const SerdNode* upper = + serd_model_get(model, restriction, uris->xsd_maxInclusive, 0, 0); + if (upper) { + if (bound_cmp(model, uris, literal, type, upper) > 0) { + fprintf(stderr, + "`%s' is not <= <%s> maximum `%s'\n", + serd_node_get_string(literal), + serd_node_get_string(type), + serd_node_get_string(upper)); + return false; + } + ++n_restrictions; + } + + return true; // Unknown restriction, be quietly tolerant +} + +static bool +literal_is_valid(SerdModel* model, + const URIs* uris, + const SerdStatement* statement, + const SerdNode* literal, + const SerdNode* type) +{ + if (!type) { + return true; + } + + /* Check that literal data is related to required type. We don't do a + strict subtype check here because e.g. an xsd:decimal might be a valid + xsd:unsignedInt, which the pattern checks will verify, but if the + literal type is not related to the required type at all + (e.g. xsd:decimal and xsd:string) there is a problem. */ + const SerdNode* datatype = serd_node_get_datatype(literal); + if (datatype && datatype != type) { + if (!is_descendant_of( + model, uris, datatype, type, uris->owl_onDatatype) && + !is_descendant_of( + model, uris, type, datatype, uris->owl_onDatatype) && + !(serd_node_equals(datatype, uris->xsd_decimal) && + is_descendant_of(model, + uris, + type, + uris->xsd_double, + uris->owl_onDatatype))) { + errorf(statement, + "Literal `%s' datatype <%s> is not compatible with <%s>\n", + serd_node_get_string(literal), + serd_node_get_string(datatype), + serd_node_get_string(type)); + return false; + } + } + + // Find restrictions list + const SerdNode* head = + serd_model_get(model, type, uris->owl_withRestrictions, 0, 0); + if (!head) { + return true; // No restrictions + } + + // Walk list, checking each restriction + while (head) { + SerdIter* f = serd_model_find(model, head, uris->rdf_first, 0, 0); + if (!f) { + break; // Reached end of restrictions list without failure + } + + // Check this restriction + const bool good = + check_restriction(model, + uris, + literal, + type, + serd_statement_get_object(serd_iter_get(f))); + serd_iter_free(f); + + if (!good) { + return false; // Failed, literal is invalid + } + + // Seek to next list node + SerdIter* n = serd_model_find(model, head, uris->rdf_rest, 0, 0); + head = n ? serd_statement_get_object(serd_iter_get(n)) : NULL; + serd_iter_free(n); + } + + const SerdNode* super = + serd_model_get(model, type, uris->owl_onDatatype, 0, 0); + if (super) { + const bool good = + literal_is_valid(model, uris, statement, literal, super); + return good; // Match iff literal also matches supertype + } + + return true; // Matches top level type +} + +static bool +check_resource_type(SerdModel* model, + const URIs* uris, + const SerdStatement* statement, + const SerdNode* node, + const SerdNode* klass) +{ + if (serd_node_get_type(node) == SERD_LITERAL) { + errorf(statement, "Literal found where class instance expected"); + return false; + } else if (is_descendant_of(model, + uris, + klass, + uris->rdfs_Literal, + uris->rdfs_subClassOf) || + serd_model_ask( + model, klass, uris->rdf_type, uris->rdfs_Datatype, 0)) { + errorf(statement, "Resource found where literal expected"); + return false; + } + + if (serd_model_ask( + model, klass, uris->rdf_type, uris->owl_Restriction, NULL)) { + if (check_instance(model, uris, klass, statement, node)) { + return false; + } + } + + SerdIter* r = serd_model_find(model, klass, uris->rdfs_subClassOf, NULL, NULL); + SERD_FOREACH (s, r) { + const SerdNode* super = serd_statement_get_object(s); + if (!check_resource_type(model, uris, statement, node, super)) { + return false; + } + } + + return true; +} + +static bool +check_type(SerdModel* model, + const URIs* uris, + const SerdStatement* statement, + const SerdNode* node, + const SerdNode* type) +{ + if (serd_node_equals(type, uris->rdfs_Resource) || + serd_node_equals(type, uris->owl_Thing)) { + return true; + } + + if (serd_node_get_type(node) == SERD_LITERAL) { + if (serd_node_equals(type, uris->rdfs_Literal)) { + return true; + } else if (serd_node_equals(type, uris->rdf_PlainLiteral)) { + return !serd_node_get_language(node); + } else { + return literal_is_valid(model, uris, statement, node, type); + } + } else if (serd_node_get_type(node) == SERD_URI) { + if (serd_node_equals(type, uris->foaf_Document)) { + return true; // Questionable... + } else if (is_descendant_of(model, + uris, + type, + uris->xsd_anyURI, + uris->owl_onDatatype)) { + /* Type is any URI and this is a URI, so pass. Restrictions on + anyURI subtypes are not currently checked (very uncommon). */ + return true; // Type is anyURI, and this is a URI + } else { + return check_resource_type(model, uris, statement, node, type); + } + } else { + if (!check_resource_type(model, uris, statement, node, type)) { + return false; + } + + return true; // Blanks often lack explicit types, ignore + } + + return false; +} + +static uint64_t +count_non_blanks(SerdIter* i, SerdField field) +{ + uint64_t n = 0; + SERD_FOREACH (s, i) { + const SerdNode* node = serd_statement_get_node(s, field); + if (serd_node_get_type(node) != SERD_BLANK) { + ++n; + } + } + return n; +} + +static int +check_properties(SerdModel* model, URIs* uris) +{ + int st = 0; + SerdIter* i = serd_model_begin(model); + SERD_FOREACH (statement, i) { + const SerdNode* subj = serd_statement_get_subject(statement); + const SerdNode* pred = serd_statement_get_predicate(statement); + const SerdNode* obj = serd_statement_get_object(statement); + + bool is_any_property = false; + SerdIter* t = serd_model_find(model, pred, uris->rdf_type, NULL, NULL); + SERD_FOREACH (s, t) { + if (is_descendant_of(model, + uris, + serd_statement_get_object(s), + uris->rdf_Property, + uris->rdfs_subClassOf)) { + is_any_property = true; + break; + } + } + serd_iter_free(t); + + const bool is_ObjectProperty = serd_model_ask( + model, pred, uris->rdf_type, uris->owl_ObjectProperty, 0); + const bool is_FunctionalProperty = serd_model_ask( + model, pred, uris->rdf_type, uris->owl_FunctionalProperty, 0); + const bool is_InverseFunctionalProperty = + serd_model_ask(model, + pred, + uris->rdf_type, + uris->owl_InverseFunctionalProperty, + 0); + const bool is_DatatypeProperty = serd_model_ask( + model, pred, uris->rdf_type, uris->owl_DatatypeProperty, 0); + + if (!is_any_property) { + st = errorf(statement, "Use of undefined property"); + } + + if (!serd_model_ask(model, pred, uris->rdfs_label, NULL, NULL)) { + st = errorf(statement, + "Property <%s> has no label", + serd_node_get_string(pred)); + } + + if (is_DatatypeProperty && serd_node_get_type(obj) != SERD_LITERAL) { + st = errorf(statement, "Datatype property with non-literal value"); + } + + if (is_ObjectProperty && serd_node_get_type(obj) == SERD_LITERAL) { + st = errorf(statement, "Object property with literal value"); + } + + if (is_FunctionalProperty) { + SerdIter* o = serd_model_find(model, subj, pred, NULL, NULL); + const uint64_t n = count_non_blanks(o, SERD_OBJECT); + if (n > 1) { + st = errorf( + statement, "Functional property with %u objects", n); + } + serd_iter_free(o); + } + + if (is_InverseFunctionalProperty) { + SerdIter* s = serd_model_find(model, NULL, pred, obj, NULL); + const unsigned n = count_non_blanks(s, SERD_SUBJECT); + if (n > 1) { + st = errorf(statement, + "Inverse functional property with %u subjects", + n); + } + serd_iter_free(s); + } + + if (serd_node_equals(pred, uris->rdf_type) && + !serd_model_ask( + model, obj, uris->rdf_type, uris->rdfs_Class, NULL) && + !serd_model_ask( + model, obj, uris->rdf_type, uris->owl_Class, NULL)) { + st = errorf(statement, "Type is not a rdfs:Class or owl:Class"); + } + + if (serd_node_get_type(obj) == SERD_LITERAL && + !literal_is_valid( + model, uris, statement, obj, serd_node_get_datatype(obj))) { + st = errorf(statement, "Literal does not match datatype"); + } + + SerdIter* r = + serd_model_find(model, pred, uris->rdfs_range, NULL, NULL); + SERD_FOREACH (s, r) { + const SerdNode* range = serd_statement_get_object(s); + if (!check_type(model, uris, statement, obj, range)) { + st = errorf(statement, + "Object not in range <%s>", + serd_node_get_string(range)); + } + } + serd_iter_free(r); + + SerdIter* d = + serd_model_find(model, pred, uris->rdfs_domain, NULL, NULL); + if (d) { + const SerdNode* domain = + serd_statement_get_object(serd_iter_get(d)); + if (!check_type(model, uris, statement, subj, domain)) { + st = errorf(statement, + "Subject not in domain <%s>", + serd_node_get_string(domain)); + } + serd_iter_free(d); + } + } + serd_iter_free(i); + + return st; +} + +static int +check_instance(SerdModel* model, + const URIs* uris, + const SerdNode* restriction, + const SerdStatement* statement, + const SerdNode* instance) +{ + int st = 0; + const SerdNode* prop = serd_model_get( + model, restriction, uris->owl_onProperty, NULL, NULL); + if (!prop) { + return 0; + } + + const unsigned values = serd_model_count(model, instance, prop, NULL, NULL); + + // Check exact cardinality + const SerdNode* card = serd_model_get( + model, restriction, uris->owl_cardinality, NULL, NULL); + if (card) { + const unsigned c = atoi(serd_node_get_string(card)); + if (values != c) { + st = errorf(statement, + "Property %s on %s has %u != %u values", + serd_node_get_string(prop), + serd_node_get_string(instance), + values, + c); + } + } + + // Check minimum cardinality + const SerdNode* minCard = serd_model_get( + model, restriction, uris->owl_minCardinality, NULL, NULL); + if (minCard) { + const unsigned m = atoi(serd_node_get_string(minCard)); + if (values < m) { + st = errorf(statement, + "Property %s on %s has %u < %u values", + serd_node_get_string(prop), + serd_node_get_string(instance), + values, + m); + } + } + + // Check maximum cardinality + const SerdNode* maxCard = serd_model_get( + model, restriction, uris->owl_maxCardinality, NULL, NULL); + if (maxCard) { + const unsigned m = atoi(serd_node_get_string(maxCard)); + if (values < m) { + st = errorf(statement, + "Property %s on %s has %u > %u values", + serd_node_get_string(prop), + serd_node_get_string(instance), + values, + m); + } + } + + // Check someValuesFrom + const SerdNode* type = serd_model_get( + model, restriction, uris->owl_someValuesFrom, 0, 0); + if (type) { + SerdIter* v = serd_model_find(model, instance, prop, NULL, NULL); + bool found = false; + SERD_FOREACH (s, v) { + const SerdNode* value = serd_statement_get_object(s); + if (check_type(model, uris, statement, value, type)) { + found = true; + break; + } + } + if (!found) { + st = errorf(statement, + "%s has no <%s> values of type <%s>\n", + serd_node_get_string(instance), + serd_node_get_string(prop), + serd_node_get_string(type)); + } + serd_iter_free(v); + } + + return st; +} + +static int +check_class_instances(SerdModel* model, + const URIs* uris, + const SerdNode* restriction, + const SerdNode* klass) +{ + // Check immediate instances of this class + SerdIter* i = serd_model_find(model, NULL, uris->rdf_type, klass, NULL); + SERD_FOREACH (s, i) { + check_instance( + model, uris, restriction, s, serd_statement_get_subject(s)); + } + serd_iter_free(i); + + // Check instances of all subclasses recursively + SerdIter* s = + serd_model_find(model, NULL, uris->rdfs_subClassOf, klass, NULL); + SERD_FOREACH (statement, s) { + const SerdNode* subklass = serd_statement_get_subject(statement); + check_class_instances(model, uris, restriction, subklass); + } + serd_iter_free(s); + + return 0; +} + +static int +check_instances(SerdModel* model, const URIs* uris) +{ + int st = 0; + SerdIter* r = serd_model_find( + model, NULL, uris->rdf_type, uris->owl_Restriction, NULL); + SERD_FOREACH (s, r) { + const SerdNode* restriction = serd_statement_get_subject(s); + const SerdNode* prop = serd_model_get( + model, restriction, uris->owl_onProperty, NULL, NULL); + if (!prop) { + continue; + } + + SerdIter* c = serd_model_find( + model, NULL, uris->rdfs_subClassOf, restriction, NULL); + SERD_FOREACH (t, c) { + const SerdNode* klass = serd_statement_get_subject(t); + check_class_instances(model, uris, restriction, klass); + } + serd_iter_free(c); + } + serd_iter_free(r); + + return st; +} + +static int +missing_arg(const char* name, char opt) +{ + ERRORF("option requires an argument -- '%c'\n", opt); + return print_usage(name, true); +} + +int +main(int argc, char** argv) +{ + if (argc < 2) { + return print_usage(argv[0], true); + } + + int a = 1; + long stack_size = 4194304; + for (; a < argc && argv[a][0] == '-'; ++a) { + if (argv[a][1] == 'k') { + if (++a == argc) { + return missing_arg(argv[0], 'k'); + } + stack_size = strtol(argv[a], NULL, 10); + if (stack_size <= 0 || stack_size == LONG_MAX) { + ERRORF("stack size `%ld' out of range\n", stack_size); + return 1; + } + } else if (argv[a][1] == 'l') { + one_line_errors = true; + } else if (argv[a][1] == 'v') { + return print_version(); + } else { + fprintf(stderr, "%s: Unknown option `%s'\n", argv[0], argv[a]); + return print_usage(argv[0], true); + } + } + + SerdWorld* world = serd_world_new(); + SerdModel* model = serd_model_new(world, SERD_SPO | SERD_OPS, false); + SerdEnv* env = serd_env_new(NULL); + SerdInserter* inserter = serd_inserter_new(model, env, NULL); + + const SerdSinkInterface* sink = serd_inserter_get_sink_interface(inserter); + + // FIXME: syntax + SerdReader* reader = serd_reader_new(world, SERD_TURTLE, sink, stack_size); + + for (; a < argc; ++a) { + const char* input = argv[a]; + char* in_path = absolute_path(input); + + if (!in_path) { + fprintf(stderr, "Skipping file %s\n", input); + continue; + } + + SerdNode* base_uri_node = serd_node_new_file_uri(in_path, NULL, true); + + serd_env_set_base_uri(env, base_uri_node); + SerdStatus st = serd_reader_start_file(reader, input, true); + st = serd_reader_read_document(reader); + st = serd_reader_end_stream(reader); + + if (st) { + fprintf(stderr, + "error reading %s: %s\n", + in_path, + serd_strerror(st)); + } + + serd_node_free(base_uri_node); + free(in_path); + } + serd_reader_free(reader); + serd_env_free(env); + +#define URI(prefix, suffix) \ + uris.prefix##_##suffix = serd_node_new_uri(NS_##prefix #suffix) + + URIs uris; + URI(foaf, Document); + URI(owl, AnnotationProperty); + URI(owl, Class); + URI(owl, DatatypeProperty); + URI(owl, FunctionalProperty); + URI(owl, InverseFunctionalProperty); + URI(owl, ObjectProperty); + URI(owl, OntologyProperty); + URI(owl, Restriction); + URI(owl, Thing); + URI(owl, cardinality); + URI(owl, equivalentClass); + URI(owl, maxCardinality); + URI(owl, minCardinality); + URI(owl, onDatatype); + URI(owl, onProperty); + URI(owl, someValuesFrom); + URI(owl, withRestrictions); + URI(rdf, PlainLiteral); + URI(rdf, Property); + URI(rdf, first); + URI(rdf, rest); + URI(rdf, type); + URI(rdfs, Class); + URI(rdfs, Datatype); + URI(rdfs, Literal); + URI(rdfs, Resource); + URI(rdfs, domain); + URI(rdfs, label); + URI(rdfs, range); + URI(rdfs, subClassOf); + URI(xsd, anyURI); + URI(xsd, decimal); + URI(xsd, double); + URI(xsd, maxInclusive); + URI(xsd, minInclusive); + URI(xsd, pattern); + URI(xsd, string); + +#ifndef HAVE_PCRE + fprintf(stderr, "warning: Built without PCRE, datatypes not checked.\n"); +#endif + + const int prop_st = check_properties(model, &uris); + const int inst_st = check_instances(model, &uris); + + printf("Found %d errors among %d files (checked %d restrictions)\n", + n_errors, + argc - 1, + n_restrictions); + + serd_model_free(model); + serd_world_free(world); + return prop_st || inst_st; +} @@ -67,6 +67,19 @@ def configure(conf): defines = ['_POSIX_C_SOURCE=200809L'], mandatory = False) + autowaf.check_pkg(conf, 'libpcre', uselib_store='PCRE', mandatory=False) + if conf.env.HAVE_PCRE: + if conf.check(cflags=['-pthread'], mandatory=False): + conf.env.PTHREAD_CFLAGS = ['-pthread'] + if conf.env.CC_NAME != 'clang': + conf.env.PTHREAD_LINKFLAGS = ['-pthread'] + elif conf.check(linkflags=['-lpthread'], mandatory=False): + conf.env.PTHREAD_CFLAGS = [] + conf.env.PTHREAD_LINKFLAGS = ['-lpthread'] + else: + conf.env.PTHREAD_CFLAGS = [] + conf.env.PTHREAD_LINKFLAGS = [] + dump = Options.options.dump.split(',') if 'all' in dump or 'iter' in dump: conf.define('SERD_DEBUG_ITER', 1) @@ -176,18 +189,27 @@ def build(bld): # Utilities if bld.env.BUILD_UTILS: - obj = bld(features = 'c cprogram', - source = 'src/serdi.c', - target = 'serdi', - includes = ['.', './src'], - use = 'libserd', - lib = lib_args['lib'], - install_path = '${BINDIR}') - if not bld.env.BUILD_SHARED or bld.env.STATIC_PROGS: - obj.use = 'libserd_static' - if bld.env.STATIC_PROGS: - obj.env.SHLIB_MARKER = obj.env.STLIB_MARKER - obj.linkflags = ['-static'] + utils = ['serdi'] + if bld.env.HAVE_PCRE: + utils += ['serd_validate'] + + for i in utils: + obj = bld(features = 'c cprogram', + source = 'src/%s.c' % i, + target = i, + includes = ['.', './src'], + use = 'libserd', + lib = lib_args['lib'], + install_path = '${BINDIR}') + if not bld.env.BUILD_SHARED or bld.env.STATIC_PROGS: + obj.use = 'libserd_static' + if bld.env.STATIC_PROGS: + obj.env.SHLIB_MARKER = obj.env.STLIB_MARKER + obj.linkflags = ['-static'] + if i == 'serd_validate': + autowaf.use_lib(bld, obj, 'PCRE') + obj.cflags = bld.env.PTHREAD_CFLAGS + obj.linkflags = bld.env.PTHREAD_LINKFLAGS # Documentation autowaf.build_dox(bld, 'SERD', SERD_VERSION, top, out) |