diff options
author | David Robillard <d@drobilla.net> | 2019-10-14 23:26:41 +0200 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2020-10-27 13:13:59 +0100 |
commit | 80fb6d0ff7c093466ac70b38be5676b868516c08 (patch) | |
tree | 9589dad1cae377a3c7e11aa7983106ac9d24afd0 | |
parent | 7f1d50b40814db24573b9eb425566ce1d44d2e85 (diff) | |
download | serd-80fb6d0ff7c093466ac70b38be5676b868516c08.tar.gz serd-80fb6d0ff7c093466ac70b38be5676b868516c08.tar.bz2 serd-80fb6d0ff7c093466ac70b38be5676b868516c08.zip |
Add support for basic literal normalisation
-rw-r--r-- | NEWS | 1 | ||||
-rw-r--r-- | doc/serdi.1 | 8 | ||||
-rw-r--r-- | serd/serd.h | 21 | ||||
-rw-r--r-- | src/normalise.c | 273 | ||||
-rw-r--r-- | src/serdi.c | 18 | ||||
-rw-r--r-- | src/string_utils.h | 2 | ||||
-rw-r--r-- | tests/normalise/manifest.ttl | 17 | ||||
-rw-r--r-- | tests/normalise/test-normalise.nt | 69 | ||||
-rw-r--r-- | tests/normalise/test-normalise.ttl | 74 | ||||
-rw-r--r-- | wscript | 5 |
10 files changed, 483 insertions, 5 deletions
@@ -6,6 +6,7 @@ serd (1.0.1) unstable; * Add extensible logging API * Add model for storing statements in memory * Add option for writing terse output without newlines + * Add support for basic literal normalisation * Add support for validation * Add support for writing terse collections * Add support for xsd:float and xsd:double literals diff --git a/doc/serdi.1 b/doc/serdi.1 index 13ee7456..01600700 100644 --- a/doc/serdi.1 +++ b/doc/serdi.1 @@ -65,6 +65,14 @@ This loads the complete input data into memory before writing the output. This will normalize statement order, at the cost of performance and memory consumption. .TP +\fB\-n\fR +Normalise literals. +This normalises supported XSD literals to their canonical form. +Normalisation is conservative in that malformed literals are passed through unmodified. +Supported types: \fIdecimal\fR and all its subtypes, \fIboolean\fR, \fIfloat\fR, and \fIdouble\fR. +Note that this is not a validator, and it will not, for example, guarantee that a \fInonNegativeInteger\fR is actually non-negative. + +.TP .BR \-o " " \fISYNTAX\fR Write output as \fISYNTAX\fR. Valid values (case-insensitive): \*(lqturtle\*(rq, \*(lqntriples\*(rq, \*(lqtrig\*(rq, \*(lqnquads\*(rq, \*(lqempty\*(rq. diff --git a/serd/serd.h b/serd/serd.h index d57dc00c..2574a592 100644 --- a/serd/serd.h +++ b/serd/serd.h @@ -658,6 +658,11 @@ SERD_API SerdNode* serd_new_resolved_uri(const char* str, const SerdNode* base); +/// Return a normalised version of `node` if possible, NULL otherwise +SERD_API +SerdNode* +serd_node_normalise(const SerdEnv* env, const SerdNode* node); + /** Resolve `node` against `base` @@ -1213,6 +1218,22 @@ serd_sink_write_end(const SerdSink* sink, const SerdNode* node); /** @} + @name Stream Processing + @{ +*/ + +/** + Return a sink that normalises literal nodes in statements where possible. + + The returned sink acts like `target` in all respects, except literal nodes + in statements may be modified from the original. +*/ +SERD_API +SerdSink* +serd_normaliser_new(const SerdSink* target, const SerdEnv* env); + +/** + @} @name Reader @{ */ diff --git a/src/normalise.c b/src/normalise.c new file mode 100644 index 00000000..34f97f71 --- /dev/null +++ b/src/normalise.c @@ -0,0 +1,273 @@ +/* + Copyright 2019-2020 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "namespaces.h" +#include "node.h" +#include "statement.h" +#include "string_utils.h" + +#include "serd/serd.h" + +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> + +typedef struct +{ + const SerdEnv* env; + const SerdSink* target; +} SerdNormaliserData; + +/// Return true iff `c` is "+" or "-" +static inline bool +is_sign(const int c) +{ + return c == '+' || c == '-'; +} + +/// Return true iff `c` is "0" +static inline bool +is_zero(const int c) +{ + return c == '0'; +} + +/// Return true iff `c` is "." +static inline bool +is_point(const int c) +{ + return c == '.'; +} + +/// Return a view of `buf` with leading and trailing whitespace trimmed +static SerdStringView +trim(const char* buf, const size_t len) +{ + SerdStringView view = {buf, len}; + + while (view.len > 0 && is_space(*view.buf)) { + ++view.buf; + --view.len; + } + + while (is_space(view.buf[view.len - 1])) { + --view.len; + } + + return view; +} + +/// Scan `s` forwards as long as `pred` is true for the character it points at +static inline const char* +scan(const char** s, bool (*pred)(const int)) +{ + while (pred(**s)) { + ++(*s); + } + + return *s; +} + +/// Skip `s` forward once if `pred` is true for the character it points at +static inline const char** +skip(const char** s, bool (*pred)(const int)) +{ + *s += pred(**s); + return s; +} + +static SerdNode* +serd_normalise_decimal(const char* str) +{ + const char* s = str; // Cursor + const char* sign = scan(&s, is_space); // Sign + const char* first = scan(skip(&s, is_sign), is_zero); // First non-zero + const char* point = scan(&s, is_digit); // Decimal point + const char* last = scan(skip(&s, is_point), is_digit); // Last digit + const char* end = scan(&s, is_space); // Last non-space + + if (*end != '\0') { + return NULL; + } else if (*point == '.') { + while (*(last - 1) == '0') { + --last; + } + } + + char* buf = (char*)calloc(1, (size_t)(end - sign) + 4u); + char* b = buf; + if (*sign == '-') { + *b++ = '-'; + } + + if (*first == '.' || first == last) { + *b++ = '0'; // Add missing leading zero (before point) + } + + memcpy(b, first, (size_t)(last - first)); + b += last - first; + + if (*point != '.') { + *b++ = '.'; + *b++ = '0'; + } else if (point == last - 1) { + *b++ = '0'; // Add missing trailing zero (after point) + } + + const char* const datatype = NS_XSD "decimal"; + SerdNode* node = serd_new_literal( + buf, (size_t)(b - buf), datatype, strlen(datatype), NULL, 0); + + free(buf); + return node; +} + +static SerdNode* +serd_normalise_integer(const char* str, const SerdNode* datatype) +{ + const char* s = str; // Cursor + const char* sign = scan(&s, is_space); // Sign + const char* first = scan(skip(&s, is_sign), is_zero); // First non-zero + const char* last = scan(&s, is_digit); // Last digit + const char* end = scan(&s, is_space); // Last non-space + + if (*end != '\0') { + return NULL; + } + + char* const buf = (char*)calloc(1, (size_t)(end - sign) + 2u); + char* b = buf; + if (*sign == '-') { + *b++ = '-'; + } + + if (first == last) { + *b = '0'; + } else { + memcpy(b, first, (size_t)(last - first)); + } + + SerdNode* node = serd_new_typed_literal(buf, datatype); + + free(buf); + return node; +} + +SerdNode* +serd_node_normalise(const SerdEnv* env, const SerdNode* const node) +{ +#define INTEGER_TYPE_LEN 19 + + static const char int_types[13][INTEGER_TYPE_LEN] = {"byte", + "int", + "integer", + "long", + "negativeInteger", + "nonNegativeInteger", + "nonPositiveInteger", + "positiveInteger", + "short", + "unsignedByte", + "unsignedInt", + "unsignedLong", + "unsignedShort"}; + + const char* str = serd_node_string(node); + SerdNode* datatype = serd_env_expand(env, serd_node_datatype(node)); + if (node->type != SERD_LITERAL || !datatype) { + return NULL; + } + + const char* datatype_uri = serd_node_string(datatype); + SerdNode* result = NULL; + if (!strcmp(datatype_uri, NS_XSD "boolean")) { + const SerdStringView trimmed = trim(str, serd_node_length(node)); + if (trimmed.len) { + if (!strncmp(trimmed.buf, "false", trimmed.len) || + !strncmp(trimmed.buf, "0", trimmed.len)) { + result = serd_new_boolean(false); + } else if (!strncmp(trimmed.buf, "true", trimmed.len) || + !strncmp(trimmed.buf, "1", trimmed.len)) { + result = serd_new_boolean(true); + } + } + } else if (!strcmp(datatype_uri, NS_XSD "float")) { + result = serd_new_float((float)serd_strtod(str, NULL)); + } else if (!strcmp(datatype_uri, NS_XSD "double")) { + result = serd_new_double(serd_strtod(str, NULL)); + } else if (!strcmp(datatype_uri, NS_XSD "decimal")) { + result = serd_normalise_decimal(str); + } else if (!strncmp(datatype_uri, NS_XSD, strlen(NS_XSD)) && + bsearch(datatype_uri + strlen(NS_XSD), + &int_types, + sizeof(int_types) / INTEGER_TYPE_LEN, + INTEGER_TYPE_LEN, + (int (*)(const void*, const void*))strcmp)) { + result = serd_normalise_integer(str, datatype); + } + + serd_node_free(datatype); + return result; +} + +static SerdStatus +serd_normaliser_on_statement(SerdNormaliserData* data, + SerdStatementFlags flags, + const SerdStatement* statement) +{ + const SerdNode* object = serd_statement_object(statement); + SerdNode* normo = serd_node_normalise(data->env, object); + + if (normo) { + const SerdStatus st = serd_sink_write(data->target, + flags, + statement->nodes[0], + statement->nodes[1], + normo, + statement->nodes[3]); + + serd_node_free(normo); + return st; + } + + return serd_sink_write_statement(data->target, flags, statement); +} + +static SerdStatus +serd_normaliser_on_event(SerdNormaliserData* data, const SerdEvent* event) +{ + return (event->type == SERD_STATEMENT) + ? serd_normaliser_on_statement(data, + event->statement.flags, + event->statement.statement) + : serd_sink_write_event(data->target, event); +} + +SerdSink* +serd_normaliser_new(const SerdSink* target, const SerdEnv* env) +{ + SerdNormaliserData* data = + (SerdNormaliserData*)calloc(1, sizeof(SerdNormaliserData)); + + data->env = env; + data->target = target; + + SerdSink* sink = serd_sink_new(data, free); + + serd_sink_set_event_func(sink, (SerdEventFunc)serd_normaliser_on_event); + + return sink; +} diff --git a/src/serdi.c b/src/serdi.c index c3127e8c..7f4880fd 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -63,6 +63,7 @@ print_usage(const char* name, bool error) fprintf(os, " -k BYTES Parser stack size.\n"); fprintf(os, " -l Lax (non-strict) parsing.\n"); fprintf(os, " -m Build and serialise a model (no streaming).\n"); + fprintf(os, " -n Normalise literals.\n"); fprintf(os, " -o SYNTAX Output syntax: turtle/ntriples/nquads.\n"); fprintf(os, " -p PREFIX Add PREFIX to blank node IDs.\n"); fprintf(os, " -q Suppress all output except data.\n"); @@ -138,6 +139,7 @@ main(int argc, char** argv) bool osyntax_set = false; bool validate = false; bool use_model = false; + bool normalise = false; bool quiet = false; size_t stack_size = 4194304; const char* input_string = NULL; @@ -170,6 +172,8 @@ main(int argc, char** argv) writer_flags |= SERD_WRITE_LAX; } else if (argv[a][1] == 'm') { use_model = true; + } else if (argv[a][1] == 'n') { + normalise = true; } else if (argv[a][1] == 'q') { quiet = true; } else if (argv[a][1] == 'v') { @@ -271,7 +275,7 @@ main(int argc, char** argv) SerdModel* model = NULL; SerdSink* inserter = NULL; - const SerdSink* sink = NULL; + const SerdSink* out_sink = NULL; if (use_model) { const SerdModelFlags flags = SERD_INDEX_SPO | (input_has_graphs ? SERD_INDEX_GRAPHS : 0u) | @@ -280,9 +284,16 @@ main(int argc, char** argv) model = serd_model_new(world, flags); inserter = serd_inserter_new(model, env, NULL); - sink = inserter; + out_sink = inserter; } else { - sink = serd_writer_get_sink(writer); + out_sink = serd_writer_get_sink(writer); + } + + const SerdSink* sink = out_sink; + + SerdSink* normaliser = NULL; + if (normalise) { + sink = normaliser = serd_normaliser_new(out_sink, env); } if (quiet) { @@ -359,6 +370,7 @@ main(int argc, char** argv) serd_range_free(range); } + serd_sink_free(normaliser); serd_node_free(input_name); serd_sink_free(inserter); serd_model_free(model); diff --git a/src/string_utils.h b/src/string_utils.h index 4bd36721..3f3d8c12 100644 --- a/src/string_utils.h +++ b/src/string_utils.h @@ -64,7 +64,7 @@ is_xdigit(const int c) } static inline bool -is_space(const char c) +is_space(const int c) { switch (c) { case ' ': case '\f': case '\n': case '\r': case '\t': case '\v': diff --git a/tests/normalise/manifest.ttl b/tests/normalise/manifest.ttl new file mode 100644 index 00000000..75f08ec9 --- /dev/null +++ b/tests/normalise/manifest.ttl @@ -0,0 +1,17 @@ +@prefix mf: <http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#> . +@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . +@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . +@prefix rdft: <http://www.w3.org/ns/rdftest#> . + +<> + rdf:type mf:Manifest ; + rdfs:comment "Serd normalisation test cases" ; + mf:entries ( + <#test-normalise> + ) . + +<#test-normalise> + rdf:type rdft:TestTurtleEval ; + mf:name "test-normalise" ; + mf:action <test-normalise.ttl> ; + mf:result <test-normalise.nt> . diff --git a/tests/normalise/test-normalise.nt b/tests/normalise/test-normalise.nt new file mode 100644 index 00000000..e23989ec --- /dev/null +++ b/tests/normalise/test-normalise.nt @@ -0,0 +1,69 @@ +_:b1 <http://example.org/boolean> "false"^^<http://www.w3.org/2001/XMLSchema#boolean> . +_:b1 <http://example.org/boolean> "false"^^<http://www.w3.org/2001/XMLSchema#boolean> . +_:b1 <http://example.org/boolean> "true"^^<http://www.w3.org/2001/XMLSchema#boolean> . +_:b1 <http://example.org/boolean> "true"^^<http://www.w3.org/2001/XMLSchema#boolean> . +_:b1 <http://example.org/boolean> " ja "^^<http://www.w3.org/2001/XMLSchema#boolean> . +_:b1 <http://example.org/boolean> ""^^<http://www.w3.org/2001/XMLSchema#boolean> . +_:b1 <http://example.org/ieee754> "1.0E2"^^<http://www.w3.org/2001/XMLSchema#float> . +_:b1 <http://example.org/ieee754> "-1.0E2"^^<http://www.w3.org/2001/XMLSchema#float> . +_:b1 <http://example.org/ieee754> "1.0E3"^^<http://www.w3.org/2001/XMLSchema#double> . +_:b1 <http://example.org/ieee754> "-1.0E3"^^<http://www.w3.org/2001/XMLSchema#double> . +_:b1 <http://example.org/machine> "9223372036854775807"^^<http://www.w3.org/2001/XMLSchema#long> . +_:b1 <http://example.org/machine> "-9223372036854775808"^^<http://www.w3.org/2001/XMLSchema#long> . +_:b1 <http://example.org/machine> "2147483647"^^<http://www.w3.org/2001/XMLSchema#int> . +_:b1 <http://example.org/machine> "-2147483648"^^<http://www.w3.org/2001/XMLSchema#int> . +_:b1 <http://example.org/machine> "32767"^^<http://www.w3.org/2001/XMLSchema#short> . +_:b1 <http://example.org/machine> "-32768"^^<http://www.w3.org/2001/XMLSchema#short> . +_:b1 <http://example.org/machine> "127"^^<http://www.w3.org/2001/XMLSchema#byte> . +_:b1 <http://example.org/machine> "-128"^^<http://www.w3.org/2001/XMLSchema#byte> . +_:b1 <http://example.org/machine> "1"^^<http://www.w3.org/2001/XMLSchema#unsignedLong> . +_:b1 <http://example.org/machine> "18446744073709551615"^^<http://www.w3.org/2001/XMLSchema#unsignedLong> . +_:b1 <http://example.org/machine> "1"^^<http://www.w3.org/2001/XMLSchema#unsignedInt> . +_:b1 <http://example.org/machine> "4294967295"^^<http://www.w3.org/2001/XMLSchema#unsignedInt> . +_:b1 <http://example.org/machine> "1"^^<http://www.w3.org/2001/XMLSchema#unsignedShort> . +_:b1 <http://example.org/machine> "65535"^^<http://www.w3.org/2001/XMLSchema#unsignedShort> . +_:b1 <http://example.org/machine> "1"^^<http://www.w3.org/2001/XMLSchema#unsignedByte> . +_:b1 <http://example.org/machine> "255"^^<http://www.w3.org/2001/XMLSchema#unsignedByte> . +_:b1 <http://example.org/decimal> "0.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "0.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "-0.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "36893488147419103232.123"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "-36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "-36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "-36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "-36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "-36893488147419103232.123"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "0.123"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "0.123"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "0.123"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "0.123"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "-0.123"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "-0.123"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> " junk 1234.5678 "^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> " 1234.5678 junk "^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/integer> "36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#integer> . +_:b1 <http://example.org/integer> "36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#integer> . +_:b1 <http://example.org/integer> "36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#integer> . +_:b1 <http://example.org/integer> "36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#integer> . +_:b1 <http://example.org/integer> "-36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#integer> . +_:b1 <http://example.org/integer> "-36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#integer> . +_:b1 <http://example.org/integer> " junk 987654321 "^^<http://www.w3.org/2001/XMLSchema#integer> . +_:b1 <http://example.org/integer> " 987654321 junk "^^<http://www.w3.org/2001/XMLSchema#integer> . +_:b1 <http://example.org/integer> "0"^^<http://www.w3.org/2001/XMLSchema#nonPositiveInteger> . +_:b1 <http://example.org/integer> "-36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#nonPositiveInteger> . +_:b1 <http://example.org/integer> "-1"^^<http://www.w3.org/2001/XMLSchema#negativeInteger> . +_:b1 <http://example.org/integer> "-36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#negativeInteger> . +_:b1 <http://example.org/integer> "0"^^<http://www.w3.org/2001/XMLSchema#nonNegativeInteger> . +_:b1 <http://example.org/integer> "36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#nonNegativeInteger> . +_:b1 <http://example.org/integer> "1"^^<http://www.w3.org/2001/XMLSchema#positiveInteger> . +_:b1 <http://example.org/integer> "36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#positiveInteger> . +_:b1 <http://example.org/other> "untyped" . +_:b1 <http://example.org/other> <http://example.org/uri> . +_:b1 <http://example.org/other> "notxsd"^^<http://example.org/sometype> . +_:b1 <http://example.org/other> "unsupported"^^<http://www.w3.org/2001/XMLSchema#name> . diff --git a/tests/normalise/test-normalise.ttl b/tests/normalise/test-normalise.ttl new file mode 100644 index 00000000..3db64f39 --- /dev/null +++ b/tests/normalise/test-normalise.ttl @@ -0,0 +1,74 @@ +@base <http://example.org/> . +@prefix xsd: <http://www.w3.org/2001/XMLSchema#> . + +[ + <boolean> " false "^^xsd:boolean , + " 0 "^^xsd:boolean , + " true "^^xsd:boolean , + " 1 "^^xsd:boolean , + " ja "^^xsd:boolean , + ""^^xsd:boolean ; + <ieee754> " +0100.0 "^^xsd:float , + " -0100.0 "^^xsd:float , + " +01000.0 "^^xsd:double , + " -01000.0 "^^xsd:double ; + <machine> " +09223372036854775807 "^^xsd:long , + " -09223372036854775808 "^^xsd:long , + " +02147483647 "^^xsd:int , + " -02147483648 "^^xsd:int , + " +032767 "^^xsd:short , + " -032768 "^^xsd:short , + " +0127 "^^xsd:byte , + " -0128 "^^xsd:byte , + " +01 "^^xsd:unsignedLong , + " 018446744073709551615 "^^xsd:unsignedLong , + " +01 "^^xsd:unsignedInt , + " 04294967295 "^^xsd:unsignedInt , + " +01 "^^xsd:unsignedShort , + " 065535 "^^xsd:unsignedShort , + " +01 "^^xsd:unsignedByte , + " 0255 "^^xsd:unsignedByte ; + <decimal> " 00 "^^xsd:decimal , + " +0 "^^xsd:decimal , + " -0 "^^xsd:decimal , + " 36893488147419103232 "^^xsd:decimal , + " 0036893488147419103232 "^^xsd:decimal , + " +36893488147419103232 "^^xsd:decimal , + " +0036893488147419103232 "^^xsd:decimal , + " +0036893488147419103232. "^^xsd:decimal , + " +0036893488147419103232.00 "^^xsd:decimal , + " +0036893488147419103232.12300 "^^xsd:decimal , + " -36893488147419103232 "^^xsd:decimal , + " -0036893488147419103232 "^^xsd:decimal , + " -0036893488147419103232. "^^xsd:decimal , + " -0036893488147419103232.00 "^^xsd:decimal , + " -0036893488147419103232.12300 "^^xsd:decimal , + " 00.12300 "^^xsd:decimal , + " .12300 "^^xsd:decimal , + " +.12300 "^^xsd:decimal , + " +00.12300 "^^xsd:decimal , + " -.12300 "^^xsd:decimal , + " -00.12300 "^^xsd:decimal , + " junk 1234.5678 "^^xsd:decimal , + " 1234.5678 junk "^^xsd:decimal ; + <integer> " 36893488147419103232 "^^xsd:integer , + " 0036893488147419103232 "^^xsd:integer , + " +36893488147419103232 "^^xsd:integer , + " +0036893488147419103232 "^^xsd:integer , + " -36893488147419103232 "^^xsd:integer , + " -0036893488147419103232 "^^xsd:integer , + " junk 987654321 "^^xsd:integer , + " 987654321 junk "^^xsd:integer , + " 00 "^^xsd:nonPositiveInteger , + " -036893488147419103232 "^^xsd:nonPositiveInteger , + " -01 "^^xsd:negativeInteger , + " -036893488147419103232 "^^xsd:negativeInteger , + " 00 "^^xsd:nonNegativeInteger , + " 036893488147419103232 "^^xsd:nonNegativeInteger , + " +01 "^^xsd:positiveInteger , + " 036893488147419103232 "^^xsd:positiveInteger ; + <other> "untyped" , + <uri> , + "notxsd"^^<sometype> , + "unsupported"^^xsd:name +] . @@ -179,6 +179,7 @@ lib_source = ['src/base64.c', 'src/n3.c', 'src/node.c', 'src/nodes.c', + 'src/normalise.c', 'src/range.c', 'src/reader.c', 'src/sink.c', @@ -678,7 +679,7 @@ def test(tst): import tempfile # Create test output directories - for i in ['bad', 'good', 'lax', 'terse', 'multifile', + for i in ['bad', 'good', 'lax', 'normalise', 'terse', 'multifile', 'TurtleTests', 'NTriplesTests', 'NQuadsTests', 'TriGTests']: try: test_dir = os.path.join('tests', i) @@ -806,6 +807,8 @@ def test(tst): test_suite(tst, serd_base + 'bad/', 'bad', None, 'Turtle') test_suite(tst, serd_base + 'lax/', 'lax', None, 'Turtle', ['-l']) test_suite(tst, serd_base + 'lax/', 'lax', None, 'Turtle') + test_suite(tst, serd_base + 'normalise/', 'normalise', None, 'Turtle', + ['-n']) test_suite(tst, serd_base + 'terse/', 'terse', None, 'Turtle', ['-t'], output_syntax='Turtle') |