aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2019-10-14 23:26:41 +0200
committerDavid Robillard <d@drobilla.net>2020-10-27 13:13:59 +0100
commit80fb6d0ff7c093466ac70b38be5676b868516c08 (patch)
tree9589dad1cae377a3c7e11aa7983106ac9d24afd0
parent7f1d50b40814db24573b9eb425566ce1d44d2e85 (diff)
downloadserd-80fb6d0ff7c093466ac70b38be5676b868516c08.tar.gz
serd-80fb6d0ff7c093466ac70b38be5676b868516c08.tar.bz2
serd-80fb6d0ff7c093466ac70b38be5676b868516c08.zip
Add support for basic literal normalisation
-rw-r--r--NEWS1
-rw-r--r--doc/serdi.18
-rw-r--r--serd/serd.h21
-rw-r--r--src/normalise.c273
-rw-r--r--src/serdi.c18
-rw-r--r--src/string_utils.h2
-rw-r--r--tests/normalise/manifest.ttl17
-rw-r--r--tests/normalise/test-normalise.nt69
-rw-r--r--tests/normalise/test-normalise.ttl74
-rw-r--r--wscript5
10 files changed, 483 insertions, 5 deletions
diff --git a/NEWS b/NEWS
index ae4f2e49..ee2f04e1 100644
--- a/NEWS
+++ b/NEWS
@@ -6,6 +6,7 @@ serd (1.0.1) unstable;
* Add extensible logging API
* Add model for storing statements in memory
* Add option for writing terse output without newlines
+ * Add support for basic literal normalisation
* Add support for validation
* Add support for writing terse collections
* Add support for xsd:float and xsd:double literals
diff --git a/doc/serdi.1 b/doc/serdi.1
index 13ee7456..01600700 100644
--- a/doc/serdi.1
+++ b/doc/serdi.1
@@ -65,6 +65,14 @@ This loads the complete input data into memory before writing the output.
This will normalize statement order, at the cost of performance and memory consumption.
.TP
+\fB\-n\fR
+Normalise literals.
+This normalises supported XSD literals to their canonical form.
+Normalisation is conservative in that malformed literals are passed through unmodified.
+Supported types: \fIdecimal\fR and all its subtypes, \fIboolean\fR, \fIfloat\fR, and \fIdouble\fR.
+Note that this is not a validator, and it will not, for example, guarantee that a \fInonNegativeInteger\fR is actually non-negative.
+
+.TP
.BR \-o " " \fISYNTAX\fR
Write output as \fISYNTAX\fR.
Valid values (case-insensitive): \*(lqturtle\*(rq, \*(lqntriples\*(rq, \*(lqtrig\*(rq, \*(lqnquads\*(rq, \*(lqempty\*(rq.
diff --git a/serd/serd.h b/serd/serd.h
index d57dc00c..2574a592 100644
--- a/serd/serd.h
+++ b/serd/serd.h
@@ -658,6 +658,11 @@ SERD_API
SerdNode*
serd_new_resolved_uri(const char* str, const SerdNode* base);
+/// Return a normalised version of `node` if possible, NULL otherwise
+SERD_API
+SerdNode*
+serd_node_normalise(const SerdEnv* env, const SerdNode* node);
+
/**
Resolve `node` against `base`
@@ -1213,6 +1218,22 @@ serd_sink_write_end(const SerdSink* sink, const SerdNode* node);
/**
@}
+ @name Stream Processing
+ @{
+*/
+
+/**
+ Return a sink that normalises literal nodes in statements where possible.
+
+ The returned sink acts like `target` in all respects, except literal nodes
+ in statements may be modified from the original.
+*/
+SERD_API
+SerdSink*
+serd_normaliser_new(const SerdSink* target, const SerdEnv* env);
+
+/**
+ @}
@name Reader
@{
*/
diff --git a/src/normalise.c b/src/normalise.c
new file mode 100644
index 00000000..34f97f71
--- /dev/null
+++ b/src/normalise.c
@@ -0,0 +1,273 @@
+/*
+ Copyright 2019-2020 David Robillard <http://drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#include "namespaces.h"
+#include "node.h"
+#include "statement.h"
+#include "string_utils.h"
+
+#include "serd/serd.h"
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+
+typedef struct
+{
+ const SerdEnv* env;
+ const SerdSink* target;
+} SerdNormaliserData;
+
+/// Return true iff `c` is "+" or "-"
+static inline bool
+is_sign(const int c)
+{
+ return c == '+' || c == '-';
+}
+
+/// Return true iff `c` is "0"
+static inline bool
+is_zero(const int c)
+{
+ return c == '0';
+}
+
+/// Return true iff `c` is "."
+static inline bool
+is_point(const int c)
+{
+ return c == '.';
+}
+
+/// Return a view of `buf` with leading and trailing whitespace trimmed
+static SerdStringView
+trim(const char* buf, const size_t len)
+{
+ SerdStringView view = {buf, len};
+
+ while (view.len > 0 && is_space(*view.buf)) {
+ ++view.buf;
+ --view.len;
+ }
+
+ while (is_space(view.buf[view.len - 1])) {
+ --view.len;
+ }
+
+ return view;
+}
+
+/// Scan `s` forwards as long as `pred` is true for the character it points at
+static inline const char*
+scan(const char** s, bool (*pred)(const int))
+{
+ while (pred(**s)) {
+ ++(*s);
+ }
+
+ return *s;
+}
+
+/// Skip `s` forward once if `pred` is true for the character it points at
+static inline const char**
+skip(const char** s, bool (*pred)(const int))
+{
+ *s += pred(**s);
+ return s;
+}
+
+static SerdNode*
+serd_normalise_decimal(const char* str)
+{
+ const char* s = str; // Cursor
+ const char* sign = scan(&s, is_space); // Sign
+ const char* first = scan(skip(&s, is_sign), is_zero); // First non-zero
+ const char* point = scan(&s, is_digit); // Decimal point
+ const char* last = scan(skip(&s, is_point), is_digit); // Last digit
+ const char* end = scan(&s, is_space); // Last non-space
+
+ if (*end != '\0') {
+ return NULL;
+ } else if (*point == '.') {
+ while (*(last - 1) == '0') {
+ --last;
+ }
+ }
+
+ char* buf = (char*)calloc(1, (size_t)(end - sign) + 4u);
+ char* b = buf;
+ if (*sign == '-') {
+ *b++ = '-';
+ }
+
+ if (*first == '.' || first == last) {
+ *b++ = '0'; // Add missing leading zero (before point)
+ }
+
+ memcpy(b, first, (size_t)(last - first));
+ b += last - first;
+
+ if (*point != '.') {
+ *b++ = '.';
+ *b++ = '0';
+ } else if (point == last - 1) {
+ *b++ = '0'; // Add missing trailing zero (after point)
+ }
+
+ const char* const datatype = NS_XSD "decimal";
+ SerdNode* node = serd_new_literal(
+ buf, (size_t)(b - buf), datatype, strlen(datatype), NULL, 0);
+
+ free(buf);
+ return node;
+}
+
+static SerdNode*
+serd_normalise_integer(const char* str, const SerdNode* datatype)
+{
+ const char* s = str; // Cursor
+ const char* sign = scan(&s, is_space); // Sign
+ const char* first = scan(skip(&s, is_sign), is_zero); // First non-zero
+ const char* last = scan(&s, is_digit); // Last digit
+ const char* end = scan(&s, is_space); // Last non-space
+
+ if (*end != '\0') {
+ return NULL;
+ }
+
+ char* const buf = (char*)calloc(1, (size_t)(end - sign) + 2u);
+ char* b = buf;
+ if (*sign == '-') {
+ *b++ = '-';
+ }
+
+ if (first == last) {
+ *b = '0';
+ } else {
+ memcpy(b, first, (size_t)(last - first));
+ }
+
+ SerdNode* node = serd_new_typed_literal(buf, datatype);
+
+ free(buf);
+ return node;
+}
+
+SerdNode*
+serd_node_normalise(const SerdEnv* env, const SerdNode* const node)
+{
+#define INTEGER_TYPE_LEN 19
+
+ static const char int_types[13][INTEGER_TYPE_LEN] = {"byte",
+ "int",
+ "integer",
+ "long",
+ "negativeInteger",
+ "nonNegativeInteger",
+ "nonPositiveInteger",
+ "positiveInteger",
+ "short",
+ "unsignedByte",
+ "unsignedInt",
+ "unsignedLong",
+ "unsignedShort"};
+
+ const char* str = serd_node_string(node);
+ SerdNode* datatype = serd_env_expand(env, serd_node_datatype(node));
+ if (node->type != SERD_LITERAL || !datatype) {
+ return NULL;
+ }
+
+ const char* datatype_uri = serd_node_string(datatype);
+ SerdNode* result = NULL;
+ if (!strcmp(datatype_uri, NS_XSD "boolean")) {
+ const SerdStringView trimmed = trim(str, serd_node_length(node));
+ if (trimmed.len) {
+ if (!strncmp(trimmed.buf, "false", trimmed.len) ||
+ !strncmp(trimmed.buf, "0", trimmed.len)) {
+ result = serd_new_boolean(false);
+ } else if (!strncmp(trimmed.buf, "true", trimmed.len) ||
+ !strncmp(trimmed.buf, "1", trimmed.len)) {
+ result = serd_new_boolean(true);
+ }
+ }
+ } else if (!strcmp(datatype_uri, NS_XSD "float")) {
+ result = serd_new_float((float)serd_strtod(str, NULL));
+ } else if (!strcmp(datatype_uri, NS_XSD "double")) {
+ result = serd_new_double(serd_strtod(str, NULL));
+ } else if (!strcmp(datatype_uri, NS_XSD "decimal")) {
+ result = serd_normalise_decimal(str);
+ } else if (!strncmp(datatype_uri, NS_XSD, strlen(NS_XSD)) &&
+ bsearch(datatype_uri + strlen(NS_XSD),
+ &int_types,
+ sizeof(int_types) / INTEGER_TYPE_LEN,
+ INTEGER_TYPE_LEN,
+ (int (*)(const void*, const void*))strcmp)) {
+ result = serd_normalise_integer(str, datatype);
+ }
+
+ serd_node_free(datatype);
+ return result;
+}
+
+static SerdStatus
+serd_normaliser_on_statement(SerdNormaliserData* data,
+ SerdStatementFlags flags,
+ const SerdStatement* statement)
+{
+ const SerdNode* object = serd_statement_object(statement);
+ SerdNode* normo = serd_node_normalise(data->env, object);
+
+ if (normo) {
+ const SerdStatus st = serd_sink_write(data->target,
+ flags,
+ statement->nodes[0],
+ statement->nodes[1],
+ normo,
+ statement->nodes[3]);
+
+ serd_node_free(normo);
+ return st;
+ }
+
+ return serd_sink_write_statement(data->target, flags, statement);
+}
+
+static SerdStatus
+serd_normaliser_on_event(SerdNormaliserData* data, const SerdEvent* event)
+{
+ return (event->type == SERD_STATEMENT)
+ ? serd_normaliser_on_statement(data,
+ event->statement.flags,
+ event->statement.statement)
+ : serd_sink_write_event(data->target, event);
+}
+
+SerdSink*
+serd_normaliser_new(const SerdSink* target, const SerdEnv* env)
+{
+ SerdNormaliserData* data =
+ (SerdNormaliserData*)calloc(1, sizeof(SerdNormaliserData));
+
+ data->env = env;
+ data->target = target;
+
+ SerdSink* sink = serd_sink_new(data, free);
+
+ serd_sink_set_event_func(sink, (SerdEventFunc)serd_normaliser_on_event);
+
+ return sink;
+}
diff --git a/src/serdi.c b/src/serdi.c
index c3127e8c..7f4880fd 100644
--- a/src/serdi.c
+++ b/src/serdi.c
@@ -63,6 +63,7 @@ print_usage(const char* name, bool error)
fprintf(os, " -k BYTES Parser stack size.\n");
fprintf(os, " -l Lax (non-strict) parsing.\n");
fprintf(os, " -m Build and serialise a model (no streaming).\n");
+ fprintf(os, " -n Normalise literals.\n");
fprintf(os, " -o SYNTAX Output syntax: turtle/ntriples/nquads.\n");
fprintf(os, " -p PREFIX Add PREFIX to blank node IDs.\n");
fprintf(os, " -q Suppress all output except data.\n");
@@ -138,6 +139,7 @@ main(int argc, char** argv)
bool osyntax_set = false;
bool validate = false;
bool use_model = false;
+ bool normalise = false;
bool quiet = false;
size_t stack_size = 4194304;
const char* input_string = NULL;
@@ -170,6 +172,8 @@ main(int argc, char** argv)
writer_flags |= SERD_WRITE_LAX;
} else if (argv[a][1] == 'm') {
use_model = true;
+ } else if (argv[a][1] == 'n') {
+ normalise = true;
} else if (argv[a][1] == 'q') {
quiet = true;
} else if (argv[a][1] == 'v') {
@@ -271,7 +275,7 @@ main(int argc, char** argv)
SerdModel* model = NULL;
SerdSink* inserter = NULL;
- const SerdSink* sink = NULL;
+ const SerdSink* out_sink = NULL;
if (use_model) {
const SerdModelFlags flags =
SERD_INDEX_SPO | (input_has_graphs ? SERD_INDEX_GRAPHS : 0u) |
@@ -280,9 +284,16 @@ main(int argc, char** argv)
model = serd_model_new(world, flags);
inserter = serd_inserter_new(model, env, NULL);
- sink = inserter;
+ out_sink = inserter;
} else {
- sink = serd_writer_get_sink(writer);
+ out_sink = serd_writer_get_sink(writer);
+ }
+
+ const SerdSink* sink = out_sink;
+
+ SerdSink* normaliser = NULL;
+ if (normalise) {
+ sink = normaliser = serd_normaliser_new(out_sink, env);
}
if (quiet) {
@@ -359,6 +370,7 @@ main(int argc, char** argv)
serd_range_free(range);
}
+ serd_sink_free(normaliser);
serd_node_free(input_name);
serd_sink_free(inserter);
serd_model_free(model);
diff --git a/src/string_utils.h b/src/string_utils.h
index 4bd36721..3f3d8c12 100644
--- a/src/string_utils.h
+++ b/src/string_utils.h
@@ -64,7 +64,7 @@ is_xdigit(const int c)
}
static inline bool
-is_space(const char c)
+is_space(const int c)
{
switch (c) {
case ' ': case '\f': case '\n': case '\r': case '\t': case '\v':
diff --git a/tests/normalise/manifest.ttl b/tests/normalise/manifest.ttl
new file mode 100644
index 00000000..75f08ec9
--- /dev/null
+++ b/tests/normalise/manifest.ttl
@@ -0,0 +1,17 @@
+@prefix mf: <http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#> .
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix rdft: <http://www.w3.org/ns/rdftest#> .
+
+<>
+ rdf:type mf:Manifest ;
+ rdfs:comment "Serd normalisation test cases" ;
+ mf:entries (
+ <#test-normalise>
+ ) .
+
+<#test-normalise>
+ rdf:type rdft:TestTurtleEval ;
+ mf:name "test-normalise" ;
+ mf:action <test-normalise.ttl> ;
+ mf:result <test-normalise.nt> .
diff --git a/tests/normalise/test-normalise.nt b/tests/normalise/test-normalise.nt
new file mode 100644
index 00000000..e23989ec
--- /dev/null
+++ b/tests/normalise/test-normalise.nt
@@ -0,0 +1,69 @@
+_:b1 <http://example.org/boolean> "false"^^<http://www.w3.org/2001/XMLSchema#boolean> .
+_:b1 <http://example.org/boolean> "false"^^<http://www.w3.org/2001/XMLSchema#boolean> .
+_:b1 <http://example.org/boolean> "true"^^<http://www.w3.org/2001/XMLSchema#boolean> .
+_:b1 <http://example.org/boolean> "true"^^<http://www.w3.org/2001/XMLSchema#boolean> .
+_:b1 <http://example.org/boolean> " ja "^^<http://www.w3.org/2001/XMLSchema#boolean> .
+_:b1 <http://example.org/boolean> ""^^<http://www.w3.org/2001/XMLSchema#boolean> .
+_:b1 <http://example.org/ieee754> "1.0E2"^^<http://www.w3.org/2001/XMLSchema#float> .
+_:b1 <http://example.org/ieee754> "-1.0E2"^^<http://www.w3.org/2001/XMLSchema#float> .
+_:b1 <http://example.org/ieee754> "1.0E3"^^<http://www.w3.org/2001/XMLSchema#double> .
+_:b1 <http://example.org/ieee754> "-1.0E3"^^<http://www.w3.org/2001/XMLSchema#double> .
+_:b1 <http://example.org/machine> "9223372036854775807"^^<http://www.w3.org/2001/XMLSchema#long> .
+_:b1 <http://example.org/machine> "-9223372036854775808"^^<http://www.w3.org/2001/XMLSchema#long> .
+_:b1 <http://example.org/machine> "2147483647"^^<http://www.w3.org/2001/XMLSchema#int> .
+_:b1 <http://example.org/machine> "-2147483648"^^<http://www.w3.org/2001/XMLSchema#int> .
+_:b1 <http://example.org/machine> "32767"^^<http://www.w3.org/2001/XMLSchema#short> .
+_:b1 <http://example.org/machine> "-32768"^^<http://www.w3.org/2001/XMLSchema#short> .
+_:b1 <http://example.org/machine> "127"^^<http://www.w3.org/2001/XMLSchema#byte> .
+_:b1 <http://example.org/machine> "-128"^^<http://www.w3.org/2001/XMLSchema#byte> .
+_:b1 <http://example.org/machine> "1"^^<http://www.w3.org/2001/XMLSchema#unsignedLong> .
+_:b1 <http://example.org/machine> "18446744073709551615"^^<http://www.w3.org/2001/XMLSchema#unsignedLong> .
+_:b1 <http://example.org/machine> "1"^^<http://www.w3.org/2001/XMLSchema#unsignedInt> .
+_:b1 <http://example.org/machine> "4294967295"^^<http://www.w3.org/2001/XMLSchema#unsignedInt> .
+_:b1 <http://example.org/machine> "1"^^<http://www.w3.org/2001/XMLSchema#unsignedShort> .
+_:b1 <http://example.org/machine> "65535"^^<http://www.w3.org/2001/XMLSchema#unsignedShort> .
+_:b1 <http://example.org/machine> "1"^^<http://www.w3.org/2001/XMLSchema#unsignedByte> .
+_:b1 <http://example.org/machine> "255"^^<http://www.w3.org/2001/XMLSchema#unsignedByte> .
+_:b1 <http://example.org/decimal> "0.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "0.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "-0.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "36893488147419103232.123"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "-36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "-36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "-36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "-36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "-36893488147419103232.123"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "0.123"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "0.123"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "0.123"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "0.123"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "-0.123"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "-0.123"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> " junk 1234.5678 "^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> " 1234.5678 junk "^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/integer> "36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#integer> .
+_:b1 <http://example.org/integer> "36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#integer> .
+_:b1 <http://example.org/integer> "36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#integer> .
+_:b1 <http://example.org/integer> "36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#integer> .
+_:b1 <http://example.org/integer> "-36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#integer> .
+_:b1 <http://example.org/integer> "-36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#integer> .
+_:b1 <http://example.org/integer> " junk 987654321 "^^<http://www.w3.org/2001/XMLSchema#integer> .
+_:b1 <http://example.org/integer> " 987654321 junk "^^<http://www.w3.org/2001/XMLSchema#integer> .
+_:b1 <http://example.org/integer> "0"^^<http://www.w3.org/2001/XMLSchema#nonPositiveInteger> .
+_:b1 <http://example.org/integer> "-36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#nonPositiveInteger> .
+_:b1 <http://example.org/integer> "-1"^^<http://www.w3.org/2001/XMLSchema#negativeInteger> .
+_:b1 <http://example.org/integer> "-36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#negativeInteger> .
+_:b1 <http://example.org/integer> "0"^^<http://www.w3.org/2001/XMLSchema#nonNegativeInteger> .
+_:b1 <http://example.org/integer> "36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#nonNegativeInteger> .
+_:b1 <http://example.org/integer> "1"^^<http://www.w3.org/2001/XMLSchema#positiveInteger> .
+_:b1 <http://example.org/integer> "36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#positiveInteger> .
+_:b1 <http://example.org/other> "untyped" .
+_:b1 <http://example.org/other> <http://example.org/uri> .
+_:b1 <http://example.org/other> "notxsd"^^<http://example.org/sometype> .
+_:b1 <http://example.org/other> "unsupported"^^<http://www.w3.org/2001/XMLSchema#name> .
diff --git a/tests/normalise/test-normalise.ttl b/tests/normalise/test-normalise.ttl
new file mode 100644
index 00000000..3db64f39
--- /dev/null
+++ b/tests/normalise/test-normalise.ttl
@@ -0,0 +1,74 @@
+@base <http://example.org/> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+
+[
+ <boolean> " false "^^xsd:boolean ,
+ " 0 "^^xsd:boolean ,
+ " true "^^xsd:boolean ,
+ " 1 "^^xsd:boolean ,
+ " ja "^^xsd:boolean ,
+ ""^^xsd:boolean ;
+ <ieee754> " +0100.0 "^^xsd:float ,
+ " -0100.0 "^^xsd:float ,
+ " +01000.0 "^^xsd:double ,
+ " -01000.0 "^^xsd:double ;
+ <machine> " +09223372036854775807 "^^xsd:long ,
+ " -09223372036854775808 "^^xsd:long ,
+ " +02147483647 "^^xsd:int ,
+ " -02147483648 "^^xsd:int ,
+ " +032767 "^^xsd:short ,
+ " -032768 "^^xsd:short ,
+ " +0127 "^^xsd:byte ,
+ " -0128 "^^xsd:byte ,
+ " +01 "^^xsd:unsignedLong ,
+ " 018446744073709551615 "^^xsd:unsignedLong ,
+ " +01 "^^xsd:unsignedInt ,
+ " 04294967295 "^^xsd:unsignedInt ,
+ " +01 "^^xsd:unsignedShort ,
+ " 065535 "^^xsd:unsignedShort ,
+ " +01 "^^xsd:unsignedByte ,
+ " 0255 "^^xsd:unsignedByte ;
+ <decimal> " 00 "^^xsd:decimal ,
+ " +0 "^^xsd:decimal ,
+ " -0 "^^xsd:decimal ,
+ " 36893488147419103232 "^^xsd:decimal ,
+ " 0036893488147419103232 "^^xsd:decimal ,
+ " +36893488147419103232 "^^xsd:decimal ,
+ " +0036893488147419103232 "^^xsd:decimal ,
+ " +0036893488147419103232. "^^xsd:decimal ,
+ " +0036893488147419103232.00 "^^xsd:decimal ,
+ " +0036893488147419103232.12300 "^^xsd:decimal ,
+ " -36893488147419103232 "^^xsd:decimal ,
+ " -0036893488147419103232 "^^xsd:decimal ,
+ " -0036893488147419103232. "^^xsd:decimal ,
+ " -0036893488147419103232.00 "^^xsd:decimal ,
+ " -0036893488147419103232.12300 "^^xsd:decimal ,
+ " 00.12300 "^^xsd:decimal ,
+ " .12300 "^^xsd:decimal ,
+ " +.12300 "^^xsd:decimal ,
+ " +00.12300 "^^xsd:decimal ,
+ " -.12300 "^^xsd:decimal ,
+ " -00.12300 "^^xsd:decimal ,
+ " junk 1234.5678 "^^xsd:decimal ,
+ " 1234.5678 junk "^^xsd:decimal ;
+ <integer> " 36893488147419103232 "^^xsd:integer ,
+ " 0036893488147419103232 "^^xsd:integer ,
+ " +36893488147419103232 "^^xsd:integer ,
+ " +0036893488147419103232 "^^xsd:integer ,
+ " -36893488147419103232 "^^xsd:integer ,
+ " -0036893488147419103232 "^^xsd:integer ,
+ " junk 987654321 "^^xsd:integer ,
+ " 987654321 junk "^^xsd:integer ,
+ " 00 "^^xsd:nonPositiveInteger ,
+ " -036893488147419103232 "^^xsd:nonPositiveInteger ,
+ " -01 "^^xsd:negativeInteger ,
+ " -036893488147419103232 "^^xsd:negativeInteger ,
+ " 00 "^^xsd:nonNegativeInteger ,
+ " 036893488147419103232 "^^xsd:nonNegativeInteger ,
+ " +01 "^^xsd:positiveInteger ,
+ " 036893488147419103232 "^^xsd:positiveInteger ;
+ <other> "untyped" ,
+ <uri> ,
+ "notxsd"^^<sometype> ,
+ "unsupported"^^xsd:name
+] .
diff --git a/wscript b/wscript
index 2bcdb7f4..404e0d4c 100644
--- a/wscript
+++ b/wscript
@@ -179,6 +179,7 @@ lib_source = ['src/base64.c',
'src/n3.c',
'src/node.c',
'src/nodes.c',
+ 'src/normalise.c',
'src/range.c',
'src/reader.c',
'src/sink.c',
@@ -678,7 +679,7 @@ def test(tst):
import tempfile
# Create test output directories
- for i in ['bad', 'good', 'lax', 'terse', 'multifile',
+ for i in ['bad', 'good', 'lax', 'normalise', 'terse', 'multifile',
'TurtleTests', 'NTriplesTests', 'NQuadsTests', 'TriGTests']:
try:
test_dir = os.path.join('tests', i)
@@ -806,6 +807,8 @@ def test(tst):
test_suite(tst, serd_base + 'bad/', 'bad', None, 'Turtle')
test_suite(tst, serd_base + 'lax/', 'lax', None, 'Turtle', ['-l'])
test_suite(tst, serd_base + 'lax/', 'lax', None, 'Turtle')
+ test_suite(tst, serd_base + 'normalise/', 'normalise', None, 'Turtle',
+ ['-n'])
test_suite(tst, serd_base + 'terse/', 'terse', None, 'Turtle', ['-t'],
output_syntax='Turtle')