aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2019-10-14 23:26:41 +0200
committerDavid Robillard <d@drobilla.net>2021-03-08 23:36:08 -0500
commit1fd33e0a85bdf6bcc4f8138940462c4a4a391175 (patch)
treefab500d072049701370f45c5ae2af76683d8fd1f
parent7efaf2431b9c1a96f6ed2e28626aff4886efc749 (diff)
downloadserd-1fd33e0a85bdf6bcc4f8138940462c4a4a391175.tar.gz
serd-1fd33e0a85bdf6bcc4f8138940462c4a4a391175.tar.bz2
serd-1fd33e0a85bdf6bcc4f8138940462c4a4a391175.zip
Add support for converting literals to canonical form
-rw-r--r--NEWS1
-rw-r--r--doc/serdi.119
-rw-r--r--include/serd/serd.h26
-rw-r--r--meson.build1
-rw-r--r--src/canon.c194
-rw-r--r--src/node.c2
-rw-r--r--src/node.h4
-rw-r--r--src/serdi.c20
-rw-r--r--src/string_utils.h2
-rw-r--r--test/meson.build7
-rw-r--r--test/normalise/bad-boolean.ttl5
-rw-r--r--test/normalise/bad-decimal-leading.ttl4
-rw-r--r--test/normalise/bad-decimal-trailing.ttl4
-rw-r--r--test/normalise/bad-empty-boolean.ttl5
-rw-r--r--test/normalise/bad-integer-leading.ttl4
-rw-r--r--test/normalise/bad-integer-trailing.ttl4
-rw-r--r--test/normalise/manifest.ttl60
-rw-r--r--test/normalise/test-normalise.nt69
-rw-r--r--test/normalise/test-normalise.ttl75
19 files changed, 499 insertions, 7 deletions
diff --git a/NEWS b/NEWS
index e913f2f7..a61b65b5 100644
--- a/NEWS
+++ b/NEWS
@@ -5,6 +5,7 @@ serd (1.0.1) unstable;
* Add extensible logging API
* Add model for storing statements in memory
* Add option for writing terse output without newlines
+ * Add support for converting literals to canonical form
* Add support for parsing variables
* Add support for validation
* Add support for writing terse collections
diff --git a/doc/serdi.1 b/doc/serdi.1
index 888dab03..fcd58535 100644
--- a/doc/serdi.1
+++ b/doc/serdi.1
@@ -6,7 +6,7 @@
.Nd read and write RDF syntax
.Sh SYNOPSIS
.Nm serdi
-.Op Fl abefhlqv
+.Op Fl Cabefhlqv
.Op Fl I Ar base
.Op Fl c Ar prefix
.Op Fl i Ar syntax
@@ -43,6 +43,23 @@ the URI of the file is automatically used as the base URI.
This option can be used to override that,
or to provide a base URI for input from stdin or a string.
.Pp
+.It Fl C
+Convert literals to canonical form.
+Literals with supported XSD datatypes will be parsed and rewritten canonically.
+All numeric datatypes are supported, as well as
+.Vt boolean ,
+.Vt duration ,
+.Vt datetime ,
+.Vt time ,
+.Vt hexBinary ,
+and
+.Vt base64Binary .
+.Pp
+.It Fl S
+Stream model quickly without inlining.
+This only has an effect when a model is used, and disables searching and statement reordering for pretty printing.
+Statements will be written in simple sorted order, which is faster, but may result in less readable output in Turtle or TriG.
+.Pp
.It Fl V
Validate inputs.
All necessary data, including schemas, must be passed as inputs.
diff --git a/include/serd/serd.h b/include/serd/serd.h
index 93e280dc..8eb5470d 100644
--- a/include/serd/serd.h
+++ b/include/serd/serd.h
@@ -1462,6 +1462,32 @@ serd_sink_write_end(const SerdSink* SERD_NONNULL sink,
/**
@}
+ @defgroup serd_stream_processing Stream Processing
+ @{
+*/
+
+/// Flags that control canonical node transformation
+typedef enum {
+ SERD_CANON_LAX = 1u << 0u, ///< Tolerate and pass through invalid input
+} SerdCanonFlag;
+
+/// Bitwise OR of SerdCanonFlag values
+typedef uint32_t SerdCanonFlags;
+
+/**
+ Return a sink that transforms literals to canonical form where possible.
+
+ The returned sink acts like `target` in all respects, except literal nodes
+ in statements may be modified from the original.
+*/
+SERD_API
+SerdSink* SERD_ALLOCATED
+serd_canon_new(const SerdWorld* SERD_NULLABLE world,
+ const SerdSink* SERD_NONNULL target,
+ SerdReaderFlags flags);
+
+/**
+ @}
@defgroup serd_reader Reader
@{
*/
diff --git a/meson.build b/meson.build
index bc026f04..80917230 100644
--- a/meson.build
+++ b/meson.build
@@ -89,6 +89,7 @@ sources = [
'src/base64.c',
'src/byte_sink.c',
'src/byte_source.c',
+ 'src/canon.c',
'src/cursor.c',
'src/env.c',
'src/inserter.c',
diff --git a/src/canon.c b/src/canon.c
new file mode 100644
index 00000000..99351f9e
--- /dev/null
+++ b/src/canon.c
@@ -0,0 +1,194 @@
+/*
+ Copyright 2019-2020 David Robillard <d@drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#include "cursor.h"
+#include "namespaces.h"
+#include "node.h"
+#include "statement.h"
+#include "string_utils.h"
+#include "world.h"
+
+#include "exess/exess.h"
+#include "serd/serd.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+
+typedef struct {
+ const SerdWorld* world;
+ const SerdSink* target;
+ SerdCanonFlags flags;
+} SerdCanonData;
+
+static ExessResult
+make_canonical(SerdNode** const out, const SerdNode* const SERD_NONNULL node)
+{
+ *out = NULL;
+
+ const char* str = serd_node_string(node);
+ const SerdNode* datatype = serd_node_datatype(node);
+ ExessResult r = {EXESS_SUCCESS, 0};
+
+ if (serd_node_type(datatype) != SERD_URI) {
+ r.status = EXESS_BAD_VALUE;
+ return r;
+ }
+
+ const char* datatype_uri = serd_node_string(datatype);
+ if (!strcmp(datatype_uri, NS_RDF "langString")) {
+ *out = serd_new_string(serd_node_string_view(node));
+ return r;
+ }
+
+ const ExessDatatype value_type = exess_datatype_from_uri(datatype_uri);
+ if (value_type == EXESS_NOTHING) {
+ return r;
+ }
+
+ // Measure canonical form to know how much space to allocate for node
+ ExessVariant variant = exess_make_nothing(EXESS_SUCCESS);
+ if (exess_datatype_is_bounded(value_type)) {
+ r = exess_read_variant(&variant, value_type, str);
+ if (!r.status) {
+ r = exess_write_variant(variant, 0, NULL);
+ }
+ } else {
+ r = exess_write_canonical(str, value_type, 0, NULL);
+ }
+
+ if (r.status) {
+ return r;
+ }
+
+ // Allocate node
+ const size_t datatype_uri_len = serd_node_length(datatype);
+ const size_t len = serd_node_pad_size(r.count);
+ const size_t total_len = sizeof(SerdNode) + len + datatype_uri_len;
+
+ SerdNode* const result =
+ serd_node_malloc(total_len, SERD_HAS_DATATYPE, SERD_LITERAL);
+
+ // Write canonical form directly into node
+ char* buf = serd_node_buffer(result);
+ if (exess_datatype_is_bounded(value_type)) {
+ r = exess_write_variant(variant, r.count + 1, buf);
+ result->n_bytes = r.count;
+ } else {
+ r = exess_write_canonical(str, value_type, r.count + 1, buf);
+ result->n_bytes = r.count;
+ }
+
+ if (r.status) {
+ serd_node_free(result);
+ return r;
+ }
+
+ SerdNode* const datatype_node = result + 1 + (len / sizeof(SerdNode));
+ char* const datatype_buf = serd_node_buffer(datatype_node);
+
+ datatype_node->n_bytes = datatype_uri_len;
+ datatype_node->type = SERD_URI;
+ memcpy(datatype_buf, datatype_uri, datatype_uri_len + 1);
+
+ /* serd_node_check_padding(datatype_node); */
+ /* serd_node_check_padding(result); */
+
+ *out = result;
+ return r;
+}
+
+static SerdStatus
+serd_canon_on_statement(SerdCanonData* data,
+ SerdStatementFlags flags,
+ const SerdStatement* statement)
+{
+ const SerdNode* object = serd_statement_object(statement);
+ if (serd_node_type(object) != SERD_LITERAL || !serd_node_datatype(object)) {
+ return serd_sink_write_statement(data->target, flags, statement);
+ }
+
+ SerdNode* normo = NULL;
+ ExessResult r = make_canonical(&normo, object);
+ if (r.status) {
+ const bool lax = (data->flags & SERD_CANON_LAX);
+ const SerdLogLevel level =
+ lax ? SERD_LOG_LEVEL_WARNING : SERD_LOG_LEVEL_ERR;
+
+ if (statement->cursor) {
+ // Adjust column to point at the exact error location in the literal
+ const SerdCursor cursor = {statement->cursor->file,
+ statement->cursor->line,
+ statement->cursor->col + 1 +
+ (unsigned)r.count};
+
+ serd_world_logf_internal(data->world,
+ SERD_ERR_INVALID,
+ level,
+ &cursor,
+ "invalid literal (%s)\n",
+ exess_strerror(r.status));
+ } else {
+ serd_world_logf_internal(data->world,
+ SERD_ERR_INVALID,
+ level,
+ NULL,
+ "invalid literal (%s)\n",
+ exess_strerror(r.status));
+ }
+
+ if (!lax) {
+ return SERD_ERR_INVALID;
+ }
+ }
+
+ if (!normo) {
+ return serd_sink_write_statement(data->target, flags, statement);
+ }
+
+ const SerdStatus st = serd_sink_write(data->target,
+ flags,
+ statement->nodes[0],
+ statement->nodes[1],
+ normo,
+ statement->nodes[3]);
+ serd_node_free(normo);
+ return st;
+}
+
+static SerdStatus
+serd_canon_on_event(SerdCanonData* data, const SerdEvent* event)
+{
+ return (event->type == SERD_STATEMENT)
+ ? serd_canon_on_statement(
+ data, event->statement.flags, event->statement.statement)
+ : serd_sink_write_event(data->target, event);
+}
+
+SerdSink*
+serd_canon_new(const SerdWorld* world,
+ const SerdSink* target,
+ const SerdCanonFlags flags)
+{
+ SerdCanonData* const data = (SerdCanonData*)calloc(1, sizeof(SerdCanonData));
+
+ data->world = world;
+ data->target = target;
+ data->flags = flags;
+
+ return serd_sink_new(data, (SerdEventFunc)serd_canon_on_event, free);
+}
diff --git a/src/node.c b/src/node.c
index 7d1177e4..0283b14f 100644
--- a/src/node.c
+++ b/src/node.c
@@ -48,7 +48,7 @@ typedef struct {
static SerdNode*
serd_new_from_uri(const SerdURIView uri, const SerdURIView base);
-static size_t
+size_t
serd_node_pad_size(const size_t n_bytes)
{
const size_t pad = sizeof(SerdNode) - (n_bytes + 2) % sizeof(SerdNode);
diff --git a/src/node.h b/src/node.h
index 5206b109..6470d939 100644
--- a/src/node.h
+++ b/src/node.h
@@ -62,6 +62,10 @@ void
serd_node_set(SerdNode* SERD_NONNULL* SERD_NONNULL dst,
const SerdNode* SERD_NONNULL src);
+SERD_CONST_FUNC
+size_t
+serd_node_pad_size(const size_t n_bytes);
+
SERD_PURE_FUNC
size_t
serd_node_total_size(const SerdNode* SERD_NONNULL node);
diff --git a/src/serdi.c b/src/serdi.c
index 70c52682..b028b862 100644
--- a/src/serdi.c
+++ b/src/serdi.c
@@ -55,6 +55,7 @@ print_usage(const char* name, bool error)
fprintf(os, "Usage: %s [OPTION]... INPUT...\n", name);
fprintf(os, "Read and write RDF syntax.\n");
fprintf(os, "Use - for INPUT to read from standard input.\n\n");
+ fprintf(os, " -C Convert literals to canonical form.\n");
fprintf(os, " -I BASE_URI Input base URI.\n");
fprintf(os, " -V Validate inputs.\n");
fprintf(os, " -a Write ASCII output if possible.\n");
@@ -150,6 +151,7 @@ main(int argc, char** argv)
bool osyntax_set = false;
bool validate = false;
bool use_model = false;
+ bool normalise = false;
bool quiet = false;
size_t stack_size = 4194304;
const char* input_string = NULL;
@@ -163,7 +165,9 @@ main(int argc, char** argv)
break;
}
- if (argv[a][1] == 'I') {
+ if (argv[a][1] == 'C') {
+ normalise = true;
+ } else if (argv[a][1] == 'I') {
if (++a == argc) {
return missing_arg(argv[0], 'I');
}
@@ -316,7 +320,7 @@ main(int argc, char** argv)
SerdModel* model = NULL;
SerdSink* inserter = NULL;
- const SerdSink* sink = NULL;
+ const SerdSink* out_sink = NULL;
if (use_model) {
const SerdModelFlags flags =
SERD_INDEX_SPO | (input_has_graphs ? SERD_INDEX_GRAPHS : 0u) |
@@ -324,9 +328,16 @@ main(int argc, char** argv)
model = serd_model_new(world, flags);
inserter = serd_inserter_new(model, env, NULL);
- sink = inserter;
+ out_sink = inserter;
} else {
- sink = serd_writer_sink(writer);
+ out_sink = serd_writer_sink(writer);
+ }
+
+ const SerdSink* sink = out_sink;
+
+ SerdSink* canon = NULL;
+ if (normalise) {
+ sink = canon = serd_canon_new(world, out_sink, reader_flags);
}
if (quiet) {
@@ -410,6 +421,7 @@ main(int argc, char** argv)
serd_range_free(range);
}
+ serd_sink_free(canon);
serd_sink_free(inserter);
serd_model_free(model);
serd_writer_free(writer);
diff --git a/src/string_utils.h b/src/string_utils.h
index 0e9eee43..a302bc49 100644
--- a/src/string_utils.h
+++ b/src/string_utils.h
@@ -62,7 +62,7 @@ is_xdigit(const int c)
}
static inline bool
-is_space(const char c)
+is_space(const int c)
{
switch (c) {
case ' ':
diff --git a/test/meson.build b/test/meson.build
index 87b77334..d364b4f2 100644
--- a/test/meson.build
+++ b/test/meson.build
@@ -173,6 +173,13 @@ if get_option('utils')
suite: ['rdf', 'serd'],
timeout: 240)
+ manifest = files('normalise' / 'manifest.ttl')
+ base_uri = serd_base + 'normalise' + '/'
+ test('normalise', run_test_suite,
+ args: script_args + [manifest, base_uri, '--', '-C'],
+ suite: ['rdf', 'serd'],
+ timeout: 240)
+
### Run the lax suite with lax parsing enabled as well
manifest = files('lax/manifest.ttl')
base_uri = serd_base + 'lax/'
diff --git a/test/normalise/bad-boolean.ttl b/test/normalise/bad-boolean.ttl
new file mode 100644
index 00000000..c4fc3eb5
--- /dev/null
+++ b/test/normalise/bad-boolean.ttl
@@ -0,0 +1,5 @@
+@base <http://example.org/> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+
+[] <boolean> " ja "^^xsd:boolean .
+
diff --git a/test/normalise/bad-decimal-leading.ttl b/test/normalise/bad-decimal-leading.ttl
new file mode 100644
index 00000000..0d18eac7
--- /dev/null
+++ b/test/normalise/bad-decimal-leading.ttl
@@ -0,0 +1,4 @@
+@base <http://example.org/> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+
+[] <decimal> " junk 1234.5678 "^^xsd:decimal .
diff --git a/test/normalise/bad-decimal-trailing.ttl b/test/normalise/bad-decimal-trailing.ttl
new file mode 100644
index 00000000..10882ef5
--- /dev/null
+++ b/test/normalise/bad-decimal-trailing.ttl
@@ -0,0 +1,4 @@
+@base <http://example.org/> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+
+[] <decimal> " 1234.5678 junk "^^xsd:decimal .
diff --git a/test/normalise/bad-empty-boolean.ttl b/test/normalise/bad-empty-boolean.ttl
new file mode 100644
index 00000000..9a390c46
--- /dev/null
+++ b/test/normalise/bad-empty-boolean.ttl
@@ -0,0 +1,5 @@
+@base <http://example.org/> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+
+[] <boolean> ""^^xsd:boolean .
+
diff --git a/test/normalise/bad-integer-leading.ttl b/test/normalise/bad-integer-leading.ttl
new file mode 100644
index 00000000..80c1a6af
--- /dev/null
+++ b/test/normalise/bad-integer-leading.ttl
@@ -0,0 +1,4 @@
+@base <http://example.org/> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+
+[] <integer> " junk 987654321 "^^xsd:integer .
diff --git a/test/normalise/bad-integer-trailing.ttl b/test/normalise/bad-integer-trailing.ttl
new file mode 100644
index 00000000..a94a9ec4
--- /dev/null
+++ b/test/normalise/bad-integer-trailing.ttl
@@ -0,0 +1,4 @@
+@base <http://example.org/> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+
+[] <integer> " 987654321 junk "^^xsd:integer .
diff --git a/test/normalise/manifest.ttl b/test/normalise/manifest.ttl
new file mode 100644
index 00000000..a76f589d
--- /dev/null
+++ b/test/normalise/manifest.ttl
@@ -0,0 +1,60 @@
+@prefix mf: <http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#> .
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix rdft: <http://www.w3.org/ns/rdftest#> .
+
+<>
+ rdf:type mf:Manifest ;
+ rdfs:comment "Serd normalisation test cases" ;
+ mf:entries (
+ <#bad-boolean>
+ <#bad-decimal-leading>
+ <#bad-decimal-trailing>
+ <#bad-empty-boolean>
+ <#bad-integer-leading>
+ <#bad-integer-trailing>
+ <#test-normalise>
+ ) .
+
+<#bad-boolean>
+ rdf:type rdft:TestTurtleNegativeEval ;
+ mf:name "bad-boolean" ;
+ rdfs:comment "Invalid xsd::boolean syntax" ;
+ mf:action <bad-boolean.ttl> .
+
+<#bad-decimal-leading>
+ rdf:type rdft:TestTurtleNegativeEval ;
+ mf:name "bad-decimal-leading" ;
+ rdfs:comment "Invalid xsd::decimal syntax (leading garbage)" ;
+ mf:action <bad-decimal-leading.ttl> .
+
+<#bad-decimal-trailing>
+ rdf:type rdft:TestTurtleNegativeEval ;
+ mf:name "bad-decimal-trailing" ;
+ rdfs:comment "Invalid xsd::decimal syntax (trailing garbage)" ;
+ mf:action <bad-decimal-trailing.ttl> .
+
+<#bad-empty-boolean>
+ rdf:type rdft:TestTurtleNegativeEval ;
+ mf:name "bad-empty-boolean" ;
+ rdfs:comment "Invalid xsd::boolean syntax (no value)" ;
+ mf:action <bad-empty-boolean.ttl> .
+
+<#bad-integer-leading>
+ rdf:type rdft:TestTurtleNegativeEval ;
+ mf:name "bad-integer-leading" ;
+ rdfs:comment "Invalid xsd::integer syntax (leading garbage)" ;
+ mf:action <bad-integer-leading.ttl> .
+
+<#bad-integer-trailing>
+ rdf:type rdft:TestTurtleNegativeEval ;
+ mf:name "bad-integer-trailing" ;
+ rdfs:comment "Invalid xsd::integer syntax (trailing garbage)" ;
+ mf:action <bad-integer-trailing.ttl> .
+
+<#test-normalise>
+ rdf:type rdft:TestTurtleEval ;
+ mf:name "test-normalise" ;
+ mf:action <test-normalise.ttl> ;
+ mf:result <test-normalise.nt> .
+
diff --git a/test/normalise/test-normalise.nt b/test/normalise/test-normalise.nt
new file mode 100644
index 00000000..ed32a059
--- /dev/null
+++ b/test/normalise/test-normalise.nt
@@ -0,0 +1,69 @@
+_:b1 <http://example.org/boolean> "false"^^<http://www.w3.org/2001/XMLSchema#boolean> .
+_:b1 <http://example.org/boolean> "false"^^<http://www.w3.org/2001/XMLSchema#boolean> .
+_:b1 <http://example.org/boolean> "true"^^<http://www.w3.org/2001/XMLSchema#boolean> .
+_:b1 <http://example.org/boolean> "true"^^<http://www.w3.org/2001/XMLSchema#boolean> .
+_:b1 <http://example.org/ieee754> "1.0E2"^^<http://www.w3.org/2001/XMLSchema#float> .
+_:b1 <http://example.org/ieee754> "-1.0E2"^^<http://www.w3.org/2001/XMLSchema#float> .
+_:b1 <http://example.org/ieee754> "1.0E3"^^<http://www.w3.org/2001/XMLSchema#double> .
+_:b1 <http://example.org/ieee754> "-1.0E3"^^<http://www.w3.org/2001/XMLSchema#double> .
+_:b1 <http://example.org/machine> "9223372036854775807"^^<http://www.w3.org/2001/XMLSchema#long> .
+_:b1 <http://example.org/machine> "-9223372036854775808"^^<http://www.w3.org/2001/XMLSchema#long> .
+_:b1 <http://example.org/machine> "2147483647"^^<http://www.w3.org/2001/XMLSchema#int> .
+_:b1 <http://example.org/machine> "-2147483648"^^<http://www.w3.org/2001/XMLSchema#int> .
+_:b1 <http://example.org/machine> "32767"^^<http://www.w3.org/2001/XMLSchema#short> .
+_:b1 <http://example.org/machine> "-32768"^^<http://www.w3.org/2001/XMLSchema#short> .
+_:b1 <http://example.org/machine> "127"^^<http://www.w3.org/2001/XMLSchema#byte> .
+_:b1 <http://example.org/machine> "-128"^^<http://www.w3.org/2001/XMLSchema#byte> .
+_:b1 <http://example.org/machine> "1"^^<http://www.w3.org/2001/XMLSchema#unsignedLong> .
+_:b1 <http://example.org/machine> "18446744073709551615"^^<http://www.w3.org/2001/XMLSchema#unsignedLong> .
+_:b1 <http://example.org/machine> "1"^^<http://www.w3.org/2001/XMLSchema#unsignedInt> .
+_:b1 <http://example.org/machine> "4294967295"^^<http://www.w3.org/2001/XMLSchema#unsignedInt> .
+_:b1 <http://example.org/machine> "1"^^<http://www.w3.org/2001/XMLSchema#unsignedShort> .
+_:b1 <http://example.org/machine> "65535"^^<http://www.w3.org/2001/XMLSchema#unsignedShort> .
+_:b1 <http://example.org/machine> "1"^^<http://www.w3.org/2001/XMLSchema#unsignedByte> .
+_:b1 <http://example.org/machine> "255"^^<http://www.w3.org/2001/XMLSchema#unsignedByte> .
+_:b1 <http://example.org/decimal> "0.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "0.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "-0.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "36893488147419103232.123"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "-36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "-36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "-36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "-36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "-36893488147419103232.123"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "0.123"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "0.123"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "0.123"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "0.123"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "-0.123"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "-0.123"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/integer> "36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#integer> .
+_:b1 <http://example.org/integer> "36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#integer> .
+_:b1 <http://example.org/integer> "36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#integer> .
+_:b1 <http://example.org/integer> "36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#integer> .
+_:b1 <http://example.org/integer> "-36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#integer> .
+_:b1 <http://example.org/integer> "-36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#integer> .
+_:b1 <http://example.org/integer> "0"^^<http://www.w3.org/2001/XMLSchema#nonPositiveInteger> .
+_:b1 <http://example.org/integer> "-36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#nonPositiveInteger> .
+_:b1 <http://example.org/integer> "-1"^^<http://www.w3.org/2001/XMLSchema#negativeInteger> .
+_:b1 <http://example.org/integer> "-36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#negativeInteger> .
+_:b1 <http://example.org/integer> "0"^^<http://www.w3.org/2001/XMLSchema#nonNegativeInteger> .
+_:b1 <http://example.org/integer> "36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#nonNegativeInteger> .
+_:b1 <http://example.org/integer> "1"^^<http://www.w3.org/2001/XMLSchema#positiveInteger> .
+_:b1 <http://example.org/integer> "36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#positiveInteger> .
+_:b1 <http://example.org/langString> "no language tag" .
+_:b1 <http://example.org/time> "P1Y6M"^^<http://www.w3.org/2001/XMLSchema#duration> .
+_:b1 <http://example.org/time> "12:15:01Z"^^<http://www.w3.org/2001/XMLSchema#time> .
+_:b1 <http://example.org/time> "2004-04-12Z"^^<http://www.w3.org/2001/XMLSchema#date> .
+_:b1 <http://example.org/binary> "A1B7F080"^^<http://www.w3.org/2001/XMLSchema#hexBinary> .
+_:b1 <http://example.org/binary> "Zm9vYmF="^^<http://www.w3.org/2001/XMLSchema#base64Binary> .
+_:b1 <http://example.org/other> "untyped" .
+_:b1 <http://example.org/other> <http://example.org/uri> .
+_:b1 <http://example.org/other> "notxsd"^^<http://example.org/sometype> .
+_:b1 <http://example.org/other> "unsupported"^^<http://www.w3.org/2001/XMLSchema#name> .
diff --git a/test/normalise/test-normalise.ttl b/test/normalise/test-normalise.ttl
new file mode 100644
index 00000000..8cb3f0aa
--- /dev/null
+++ b/test/normalise/test-normalise.ttl
@@ -0,0 +1,75 @@
+@base <http://example.org/> .
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+
+[
+ <boolean> " false "^^xsd:boolean ,
+ " 0 "^^xsd:boolean ,
+ " true "^^xsd:boolean ,
+ " 1 "^^xsd:boolean ;
+ <ieee754> " +0100.0 "^^xsd:float ,
+ " -0100.0 "^^xsd:float ,
+ " +01000.0 "^^xsd:double ,
+ " -01000.0 "^^xsd:double ;
+ <machine> " +09223372036854775807 "^^xsd:long ,
+ " -09223372036854775808 "^^xsd:long ,
+ " +02147483647 "^^xsd:int ,
+ " -02147483648 "^^xsd:int ,
+ " +032767 "^^xsd:short ,
+ " -032768 "^^xsd:short ,
+ " +0127 "^^xsd:byte ,
+ " -0128 "^^xsd:byte ,
+ " 01 "^^xsd:unsignedLong ,
+ " 018446744073709551615 "^^xsd:unsignedLong ,
+ " 01 "^^xsd:unsignedInt ,
+ " 04294967295 "^^xsd:unsignedInt ,
+ " 01 "^^xsd:unsignedShort ,
+ " 065535 "^^xsd:unsignedShort ,
+ " 01 "^^xsd:unsignedByte ,
+ " 0255 "^^xsd:unsignedByte ;
+ <decimal> " 00 "^^xsd:decimal ,
+ " +0 "^^xsd:decimal ,
+ " -0 "^^xsd:decimal ,
+ " 36893488147419103232 "^^xsd:decimal ,
+ " 0036893488147419103232 "^^xsd:decimal ,
+ " +36893488147419103232 "^^xsd:decimal ,
+ " +0036893488147419103232 "^^xsd:decimal ,
+ " +0036893488147419103232. "^^xsd:decimal ,
+ " +0036893488147419103232.00 "^^xsd:decimal ,
+ " +0036893488147419103232.12300 "^^xsd:decimal ,
+ " -36893488147419103232 "^^xsd:decimal ,
+ " -0036893488147419103232 "^^xsd:decimal ,
+ " -0036893488147419103232. "^^xsd:decimal ,
+ " -0036893488147419103232.00 "^^xsd:decimal ,
+ " -0036893488147419103232.12300 "^^xsd:decimal ,
+ " 00.12300 "^^xsd:decimal ,
+ " .12300 "^^xsd:decimal ,
+ " +.12300 "^^xsd:decimal ,
+ " +00.12300 "^^xsd:decimal ,
+ " -.12300 "^^xsd:decimal ,
+ " -00.12300 "^^xsd:decimal ;
+ <integer> " 36893488147419103232 "^^xsd:integer ,
+ " 0036893488147419103232 "^^xsd:integer ,
+ " +36893488147419103232 "^^xsd:integer ,
+ " +0036893488147419103232 "^^xsd:integer ,
+ " -36893488147419103232 "^^xsd:integer ,
+ " -0036893488147419103232 "^^xsd:integer ,
+ " 00 "^^xsd:nonPositiveInteger ,
+ " -036893488147419103232 "^^xsd:nonPositiveInteger ,
+ " -01 "^^xsd:negativeInteger ,
+ " -036893488147419103232 "^^xsd:negativeInteger ,
+ " 00 "^^xsd:nonNegativeInteger ,
+ " 036893488147419103232 "^^xsd:nonNegativeInteger ,
+ " +01 "^^xsd:positiveInteger ,
+ " 036893488147419103232 "^^xsd:positiveInteger ;
+ <langString> "no language tag"^^rdf:langString ;
+ <time> " P1Y6M0D "^^xsd:duration ,
+ " 12:15:01+00:00 "^^xsd:time ,
+ " 02004-04-12+00:00 "^^xsd:date ;
+ <binary> "A 1 B7 F080"^^xsd:hexBinary ,
+ " Zm 9v Y m F="^^xsd:base64Binary ;
+ <other> "untyped" ,
+ <uri> ,
+ "notxsd"^^<sometype> ,
+ "unsupported"^^xsd:name
+] .