aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2021-05-30 12:23:07 -0400
committerDavid Robillard <d@drobilla.net>2022-01-14 19:37:51 -0500
commit89612ec05f596d135640413e093251fb9691ca14 (patch)
tree9c3e3d4491e6eb694aa0d429ba52ad80b0a5dc17
parentcab9b5621b4db600376090f28fdc76c7ac6bd728 (diff)
downloadserd-89612ec05f596d135640413e093251fb9691ca14.tar.gz
serd-89612ec05f596d135640413e093251fb9691ca14.tar.bz2
serd-89612ec05f596d135640413e093251fb9691ca14.zip
Add support for converting literals to canonical form
-rw-r--r--NEWS1
-rw-r--r--doc/serdi.114
-rw-r--r--include/serd/serd.h27
-rw-r--r--meson.build1
-rw-r--r--src/canon.c192
-rw-r--r--src/serdi.c20
-rw-r--r--src/string.c2
-rw-r--r--src/string_utils.h8
-rw-r--r--test/canon/bad-boolean.ttl5
-rw-r--r--test/canon/bad-decimal-leading.ttl4
-rw-r--r--test/canon/bad-decimal-trailing.ttl4
-rw-r--r--test/canon/bad-empty-boolean.ttl5
-rw-r--r--test/canon/bad-integer-leading.ttl4
-rw-r--r--test/canon/bad-integer-trailing.ttl4
-rw-r--r--test/canon/manifest.ttl58
-rw-r--r--test/canon/test-canon.nt70
-rw-r--r--test/canon/test-canon.ttl76
-rw-r--r--test/meson.build8
-rw-r--r--test/test_string.c2
19 files changed, 495 insertions, 10 deletions
diff --git a/NEWS b/NEWS
index 21dad0a1..dec4d930 100644
--- a/NEWS
+++ b/NEWS
@@ -5,6 +5,7 @@ serd (1.0.1) unstable;
* Add extensible logging API
* Add model for storing statements in memory
* Add option for writing terse output without newlines
+ * Add support for converting literals to canonical form
* Add support for parsing variables
* Add support for writing terse collections
* Add support for xsd:float and xsd:double literals
diff --git a/doc/serdi.1 b/doc/serdi.1
index e9d70857..59eb67ce 100644
--- a/doc/serdi.1
+++ b/doc/serdi.1
@@ -6,7 +6,7 @@
.Nd read, transform, and write RDF data
.Sh SYNOPSIS
.Nm serdi
-.Op Fl abefhlmqtvx
+.Op Fl Cabefhlmqtvx
.Op Fl I Ar base
.Op Fl c Ar prefix
.Op Fl i Ar syntax
@@ -35,6 +35,18 @@ or transform URIs and blank node IDs.
The options are as follows:
.Pp
.Bl -tag -compact -width 3n
+.It Fl C
+Convert literals to canonical form.
+Literals with supported XSD datatypes will be parsed and rewritten canonically.
+All numeric datatypes are supported, as well as
+.Vt boolean ,
+.Vt duration ,
+.Vt datetime ,
+.Vt time ,
+.Vt hexBinary ,
+and
+.Vt base64Binary .
+.Pp
.It Fl I Ar base
Input base URI.
Relative URI references in the input will be resolved against this.
diff --git a/include/serd/serd.h b/include/serd/serd.h
index efa48543..fad9d070 100644
--- a/include/serd/serd.h
+++ b/include/serd/serd.h
@@ -217,6 +217,7 @@ typedef enum {
SERD_ERR_BAD_CALL, ///< Invalid call
SERD_ERR_BAD_URI, ///< Invalid or unresolved URI
SERD_ERR_BAD_INDEX, ///< No optimal model index available
+ SERD_ERR_INVALID, ///< Invalid data
} SerdStatus;
/**
@@ -1939,6 +1940,32 @@ serd_sink_write_end(const SerdSink* SERD_NONNULL sink,
/**
@}
+ @defgroup serd_canon Canon
+ @{
+*/
+
+/// Flags that control canonical node transformation
+typedef enum {
+ SERD_CANON_LAX = 1u << 0u, ///< Tolerate and pass through invalid input
+} SerdCanonFlag;
+
+/// Bitwise OR of SerdCanonFlag values
+typedef uint32_t SerdCanonFlags;
+
+/**
+ Return a new sink that transforms literals to canonical form where possible.
+
+ The returned sink acts like `target` in all respects, except literal nodes
+ in statements may be modified from the original.
+*/
+SERD_API
+SerdSink* SERD_ALLOCATED
+serd_canon_new(const SerdWorld* SERD_NULLABLE world,
+ const SerdSink* SERD_NONNULL target,
+ SerdCanonFlags flags);
+
+/**
+ @}
@defgroup serd_env Environment
@{
*/
diff --git a/meson.build b/meson.build
index 5197f448..25cce6d0 100644
--- a/meson.build
+++ b/meson.build
@@ -86,6 +86,7 @@ c_header = files('include/serd/serd.h')
sources = [
'src/byte_sink.c',
'src/byte_source.c',
+ 'src/canon.c',
'src/caret.c',
'src/compare.c',
'src/cursor.c',
diff --git a/src/canon.c b/src/canon.c
new file mode 100644
index 00000000..84d20d0c
--- /dev/null
+++ b/src/canon.c
@@ -0,0 +1,192 @@
+/*
+ Copyright 2019-2022 David Robillard <d@drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#include "caret.h"
+#include "namespaces.h"
+#include "node.h"
+#include "statement.h"
+#include "string_utils.h"
+
+#include "exess/exess.h"
+#include "serd/serd.h"
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+
+typedef struct {
+ const SerdWorld* world;
+ const SerdSink* target;
+ SerdCanonFlags flags;
+} SerdCanonData;
+
+static ExessResult
+build_typed(SerdNode** const out,
+ const SerdNode* const SERD_NONNULL node,
+ const SerdNode* const SERD_NONNULL datatype)
+{
+ *out = NULL;
+
+ const char* str = serd_node_string(node);
+ const char* datatype_uri = serd_node_string(datatype);
+ ExessResult r = {EXESS_SUCCESS, 0};
+
+ if (!strcmp(datatype_uri, NS_RDF "langString")) {
+ *out = serd_new_string(serd_node_string_view(node));
+ return r;
+ }
+
+ const ExessDatatype value_type = exess_datatype_from_uri(datatype_uri);
+ if (value_type == EXESS_NOTHING) {
+ return r;
+ }
+
+ // Measure canonical form to know how much space to allocate for node
+ if ((r = exess_write_canonical(str, value_type, 0, NULL)).status) {
+ return r;
+ }
+
+ // Allocate node
+ const size_t datatype_uri_len = serd_node_length(datatype);
+ const size_t datatype_size = serd_node_total_size(datatype);
+ const size_t len = serd_node_pad_length(r.count);
+ const size_t total_len = sizeof(SerdNode) + len + datatype_size;
+ SerdNode* const result = serd_node_malloc(total_len);
+
+ result->length = r.count;
+ result->flags = SERD_HAS_DATATYPE;
+ result->type = SERD_LITERAL;
+
+ // Write canonical form directly into node
+ exess_write_canonical(str, value_type, r.count + 1, serd_node_buffer(result));
+
+ SerdNode* const datatype_node = result + 1 + (len / sizeof(SerdNode));
+ char* const datatype_buf = serd_node_buffer(datatype_node);
+
+ datatype_node->length = datatype_uri_len;
+ datatype_node->type = SERD_URI;
+ memcpy(datatype_buf, datatype_uri, datatype_uri_len + 1);
+
+ *out = result;
+ return r;
+}
+
+static ExessResult
+build_tagged(SerdNode** const out,
+ const SerdNode* const SERD_NONNULL node,
+ const SerdNode* const SERD_NONNULL language)
+{
+#define MAX_LANG_LEN 48 // RFC5646 requires 35, RFC4646 recommends 42
+
+ const size_t node_len = serd_node_length(node);
+ const char* const lang = serd_node_string(language);
+ const size_t lang_len = serd_node_length(language);
+ if (lang_len > MAX_LANG_LEN) {
+ const ExessResult r = {EXESS_NO_SPACE, node_len};
+ return r;
+ }
+
+ // Convert language tag to lower-case
+ char canonical_lang[MAX_LANG_LEN] = {0};
+ for (size_t i = 0u; i < lang_len; ++i) {
+ canonical_lang[i] = serd_to_lower(lang[i]);
+ }
+
+ // Make a new literal that is otherwise identical
+ *out = serd_new_literal(serd_node_string_view(node),
+ serd_node_flags(node),
+ SERD_SUBSTRING(canonical_lang, lang_len));
+
+ const ExessResult r = {EXESS_SUCCESS, node_len};
+ return r;
+
+#undef MAX_LANG_LEN
+}
+
+static SerdStatus
+serd_canon_on_statement(SerdCanonData* const data,
+ const SerdStatementFlags flags,
+ const SerdStatement* const statement)
+{
+ const SerdNode* const object = serd_statement_object(statement);
+ const SerdNode* const datatype = serd_node_datatype(object);
+ const SerdNode* const language = serd_node_language(object);
+ if (!datatype && !language) {
+ return serd_sink_write_statement(data->target, flags, statement);
+ }
+
+ SerdNode* normo = NULL;
+ const ExessResult r = datatype ? build_typed(&normo, object, datatype)
+ : build_tagged(&normo, object, language);
+
+ if (r.status) {
+ SerdCaret caret = {NULL, 0u, 0u};
+ const bool lax = (data->flags & SERD_CANON_LAX);
+
+ if (statement->caret) {
+ // Adjust column to point at the error within the literal
+ caret.file = statement->caret->file;
+ caret.line = statement->caret->line;
+ caret.col = statement->caret->col + 1 + (unsigned)r.count;
+ }
+
+ serd_logf_at(data->world,
+ lax ? SERD_LOG_LEVEL_WARNING : SERD_LOG_LEVEL_ERROR,
+ statement->caret ? &caret : NULL,
+ "invalid literal (%s)",
+ exess_strerror(r.status));
+
+ if (!lax) {
+ return SERD_ERR_INVALID;
+ }
+ }
+
+ if (!normo) {
+ return serd_sink_write_statement(data->target, flags, statement);
+ }
+
+ const SerdStatus st = serd_sink_write(data->target,
+ flags,
+ statement->nodes[0],
+ statement->nodes[1],
+ normo,
+ statement->nodes[3]);
+ serd_node_free(normo);
+ return st;
+}
+
+static SerdStatus
+serd_canon_on_event(SerdCanonData* const data, const SerdEvent* const event)
+{
+ return (event->type == SERD_STATEMENT)
+ ? serd_canon_on_statement(
+ data, event->statement.flags, event->statement.statement)
+ : serd_sink_write_event(data->target, event);
+}
+
+SerdSink*
+serd_canon_new(const SerdWorld* const world,
+ const SerdSink* const target,
+ const SerdCanonFlags flags)
+{
+ SerdCanonData* const data = (SerdCanonData*)calloc(1, sizeof(SerdCanonData));
+
+ data->world = world;
+ data->target = target;
+ data->flags = flags;
+
+ return serd_sink_new(data, (SerdEventFunc)serd_canon_on_event, free);
+}
diff --git a/src/serdi.c b/src/serdi.c
index 97601b83..243be98e 100644
--- a/src/serdi.c
+++ b/src/serdi.c
@@ -56,6 +56,7 @@ print_usage(const char* const name, const bool error)
fprintf(os, "Usage: %s [OPTION]... INPUT...\n", name);
fprintf(os, "Read and write RDF syntax.\n");
fprintf(os, "Use - for INPUT to read from standard input.\n\n");
+ fprintf(os, " -C Convert literals to canonical form.\n");
fprintf(os, " -I BASE_URI Input base URI.\n");
fprintf(os, " -a Write ASCII output if possible.\n");
fprintf(os, " -b Fast bulk output for large serialisations.\n");
@@ -153,6 +154,7 @@ main(int argc, char** argv)
bool no_inline = false;
bool osyntax_set = false;
bool use_model = false;
+ bool canonical = false;
bool quiet = false;
size_t stack_size = 4194304;
const char* input_string = NULL;
@@ -169,7 +171,9 @@ main(int argc, char** argv)
for (int o = 1; argv[a][o]; ++o) {
const char opt = argv[a][o];
- if (opt == 'a') {
+ if (opt == 'C') {
+ canonical = true;
+ } else if (opt == 'a') {
writer_flags |= SERD_WRITE_ASCII;
} else if (opt == 'b') {
bulk_write = true;
@@ -337,7 +341,7 @@ main(int argc, char** argv)
SerdModel* model = NULL;
SerdSink* inserter = NULL;
- const SerdSink* sink = NULL;
+ const SerdSink* out_sink = NULL;
if (use_model) {
const SerdModelFlags flags = (input_has_graphs ? SERD_STORE_GRAPHS : 0u);
@@ -354,9 +358,16 @@ main(int argc, char** argv)
}
inserter = serd_inserter_new(model, NULL);
- sink = inserter;
+ out_sink = inserter;
} else {
- sink = serd_writer_sink(writer);
+ out_sink = serd_writer_sink(writer);
+ }
+
+ const SerdSink* sink = out_sink;
+
+ SerdSink* canon = NULL;
+ if (canonical) {
+ sink = canon = serd_canon_new(world, out_sink, reader_flags);
}
if (quiet) {
@@ -455,6 +466,7 @@ main(int argc, char** argv)
serd_cursor_free(everything);
}
+ serd_sink_free(canon);
serd_sink_free(inserter);
serd_model_free(model);
serd_writer_free(writer);
diff --git a/src/string.c b/src/string.c
index 11b53050..6942b7b6 100644
--- a/src/string.c
+++ b/src/string.c
@@ -62,6 +62,8 @@ serd_strerror(const SerdStatus status)
return "Invalid or unresolved URI";
case SERD_ERR_BAD_INDEX:
return "No optimal model index available";
+ case SERD_ERR_INVALID:
+ return "Invalid data";
}
return "Unknown error";
diff --git a/src/string_utils.h b/src/string_utils.h
index 54f7877c..5cf7ba8c 100644
--- a/src/string_utils.h
+++ b/src/string_utils.h
@@ -60,7 +60,7 @@ is_xdigit(const int c)
}
static inline bool
-is_space(const char c)
+is_space(const int c)
{
switch (c) {
case ' ':
@@ -89,16 +89,16 @@ is_windows_path(const char* path)
}
static inline char
-serd_to_upper(const char c)
+serd_to_lower(const char c)
{
- return (char)((c >= 'a' && c <= 'z') ? c - 32 : c);
+ return (char)((c >= 'A' && c <= 'Z') ? c + 32 : c);
}
static inline int
serd_strncasecmp(const char* s1, const char* s2, size_t n)
{
for (; n > 0 && *s2; s1++, s2++, --n) {
- if (serd_to_upper(*s1) != serd_to_upper(*s2)) {
+ if (serd_to_lower(*s1) != serd_to_lower(*s2)) {
return ((*(const uint8_t*)s1 < *(const uint8_t*)s2) ? -1 : +1);
}
}
diff --git a/test/canon/bad-boolean.ttl b/test/canon/bad-boolean.ttl
new file mode 100644
index 00000000..c4fc3eb5
--- /dev/null
+++ b/test/canon/bad-boolean.ttl
@@ -0,0 +1,5 @@
+@base <http://example.org/> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+
+[] <boolean> " ja "^^xsd:boolean .
+
diff --git a/test/canon/bad-decimal-leading.ttl b/test/canon/bad-decimal-leading.ttl
new file mode 100644
index 00000000..0d18eac7
--- /dev/null
+++ b/test/canon/bad-decimal-leading.ttl
@@ -0,0 +1,4 @@
+@base <http://example.org/> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+
+[] <decimal> " junk 1234.5678 "^^xsd:decimal .
diff --git a/test/canon/bad-decimal-trailing.ttl b/test/canon/bad-decimal-trailing.ttl
new file mode 100644
index 00000000..10882ef5
--- /dev/null
+++ b/test/canon/bad-decimal-trailing.ttl
@@ -0,0 +1,4 @@
+@base <http://example.org/> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+
+[] <decimal> " 1234.5678 junk "^^xsd:decimal .
diff --git a/test/canon/bad-empty-boolean.ttl b/test/canon/bad-empty-boolean.ttl
new file mode 100644
index 00000000..9a390c46
--- /dev/null
+++ b/test/canon/bad-empty-boolean.ttl
@@ -0,0 +1,5 @@
+@base <http://example.org/> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+
+[] <boolean> ""^^xsd:boolean .
+
diff --git a/test/canon/bad-integer-leading.ttl b/test/canon/bad-integer-leading.ttl
new file mode 100644
index 00000000..80c1a6af
--- /dev/null
+++ b/test/canon/bad-integer-leading.ttl
@@ -0,0 +1,4 @@
+@base <http://example.org/> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+
+[] <integer> " junk 987654321 "^^xsd:integer .
diff --git a/test/canon/bad-integer-trailing.ttl b/test/canon/bad-integer-trailing.ttl
new file mode 100644
index 00000000..a94a9ec4
--- /dev/null
+++ b/test/canon/bad-integer-trailing.ttl
@@ -0,0 +1,4 @@
+@base <http://example.org/> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+
+[] <integer> " 987654321 junk "^^xsd:integer .
diff --git a/test/canon/manifest.ttl b/test/canon/manifest.ttl
new file mode 100644
index 00000000..e2195212
--- /dev/null
+++ b/test/canon/manifest.ttl
@@ -0,0 +1,58 @@
+@prefix mf: <http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix rdft: <http://www.w3.org/ns/rdftest#> .
+
+<>
+ a mf:Manifest ;
+ rdfs:comment "Serd canonical literal test cases" ;
+ mf:entries (
+ <#bad-boolean>
+ <#bad-decimal-leading>
+ <#bad-decimal-trailing>
+ <#bad-empty-boolean>
+ <#bad-integer-leading>
+ <#bad-integer-trailing>
+ <#test-canon>
+ ) .
+
+<#bad-boolean>
+ a rdft:TestTurtleNegativeEval ;
+ mf:name "bad-boolean" ;
+ rdfs:comment "Invalid xsd::boolean syntax" ;
+ mf:action <bad-boolean.ttl> .
+
+<#bad-decimal-leading>
+ a rdft:TestTurtleNegativeEval ;
+ mf:name "bad-decimal-leading" ;
+ rdfs:comment "Invalid xsd::decimal syntax (leading garbage)" ;
+ mf:action <bad-decimal-leading.ttl> .
+
+<#bad-decimal-trailing>
+ a rdft:TestTurtleNegativeEval ;
+ mf:name "bad-decimal-trailing" ;
+ rdfs:comment "Invalid xsd::decimal syntax (trailing garbage)" ;
+ mf:action <bad-decimal-trailing.ttl> .
+
+<#bad-empty-boolean>
+ a rdft:TestTurtleNegativeEval ;
+ mf:name "bad-empty-boolean" ;
+ rdfs:comment "Invalid xsd::boolean syntax (no value)" ;
+ mf:action <bad-empty-boolean.ttl> .
+
+<#bad-integer-leading>
+ a rdft:TestTurtleNegativeEval ;
+ mf:name "bad-integer-leading" ;
+ rdfs:comment "Invalid xsd::integer syntax (leading garbage)" ;
+ mf:action <bad-integer-leading.ttl> .
+
+<#bad-integer-trailing>
+ a rdft:TestTurtleNegativeEval ;
+ mf:name "bad-integer-trailing" ;
+ rdfs:comment "Invalid xsd::integer syntax (trailing garbage)" ;
+ mf:action <bad-integer-trailing.ttl> .
+
+<#test-canon>
+ a rdft:TestTurtleEval ;
+ mf:name "test-canon" ;
+ mf:action <test-canon.ttl> ;
+ mf:result <test-canon.nt> .
diff --git a/test/canon/test-canon.nt b/test/canon/test-canon.nt
new file mode 100644
index 00000000..ff492890
--- /dev/null
+++ b/test/canon/test-canon.nt
@@ -0,0 +1,70 @@
+_:b1 <http://example.org/boolean> "false"^^<http://www.w3.org/2001/XMLSchema#boolean> .
+_:b1 <http://example.org/boolean> "false"^^<http://www.w3.org/2001/XMLSchema#boolean> .
+_:b1 <http://example.org/boolean> "true"^^<http://www.w3.org/2001/XMLSchema#boolean> .
+_:b1 <http://example.org/boolean> "true"^^<http://www.w3.org/2001/XMLSchema#boolean> .
+_:b1 <http://example.org/ieee754> "1.0E2"^^<http://www.w3.org/2001/XMLSchema#float> .
+_:b1 <http://example.org/ieee754> "-1.0E2"^^<http://www.w3.org/2001/XMLSchema#float> .
+_:b1 <http://example.org/ieee754> "1.0E3"^^<http://www.w3.org/2001/XMLSchema#double> .
+_:b1 <http://example.org/ieee754> "-1.0E3"^^<http://www.w3.org/2001/XMLSchema#double> .
+_:b1 <http://example.org/machine> "9223372036854775807"^^<http://www.w3.org/2001/XMLSchema#long> .
+_:b1 <http://example.org/machine> "-9223372036854775808"^^<http://www.w3.org/2001/XMLSchema#long> .
+_:b1 <http://example.org/machine> "2147483647"^^<http://www.w3.org/2001/XMLSchema#int> .
+_:b1 <http://example.org/machine> "-2147483648"^^<http://www.w3.org/2001/XMLSchema#int> .
+_:b1 <http://example.org/machine> "32767"^^<http://www.w3.org/2001/XMLSchema#short> .
+_:b1 <http://example.org/machine> "-32768"^^<http://www.w3.org/2001/XMLSchema#short> .
+_:b1 <http://example.org/machine> "127"^^<http://www.w3.org/2001/XMLSchema#byte> .
+_:b1 <http://example.org/machine> "-128"^^<http://www.w3.org/2001/XMLSchema#byte> .
+_:b1 <http://example.org/machine> "1"^^<http://www.w3.org/2001/XMLSchema#unsignedLong> .
+_:b1 <http://example.org/machine> "18446744073709551615"^^<http://www.w3.org/2001/XMLSchema#unsignedLong> .
+_:b1 <http://example.org/machine> "1"^^<http://www.w3.org/2001/XMLSchema#unsignedInt> .
+_:b1 <http://example.org/machine> "4294967295"^^<http://www.w3.org/2001/XMLSchema#unsignedInt> .
+_:b1 <http://example.org/machine> "1"^^<http://www.w3.org/2001/XMLSchema#unsignedShort> .
+_:b1 <http://example.org/machine> "65535"^^<http://www.w3.org/2001/XMLSchema#unsignedShort> .
+_:b1 <http://example.org/machine> "1"^^<http://www.w3.org/2001/XMLSchema#unsignedByte> .
+_:b1 <http://example.org/machine> "255"^^<http://www.w3.org/2001/XMLSchema#unsignedByte> .
+_:b1 <http://example.org/decimal> "0.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "0.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "-0.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "36893488147419103232.123"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "-36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "-36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "-36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "-36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "-36893488147419103232.123"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "0.123"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "0.123"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "0.123"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "0.123"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "-0.123"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/decimal> "-0.123"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+_:b1 <http://example.org/integer> "36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#integer> .
+_:b1 <http://example.org/integer> "36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#integer> .
+_:b1 <http://example.org/integer> "36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#integer> .
+_:b1 <http://example.org/integer> "36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#integer> .
+_:b1 <http://example.org/integer> "-36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#integer> .
+_:b1 <http://example.org/integer> "-36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#integer> .
+_:b1 <http://example.org/integer> "0"^^<http://www.w3.org/2001/XMLSchema#nonPositiveInteger> .
+_:b1 <http://example.org/integer> "-36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#nonPositiveInteger> .
+_:b1 <http://example.org/integer> "-1"^^<http://www.w3.org/2001/XMLSchema#negativeInteger> .
+_:b1 <http://example.org/integer> "-36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#negativeInteger> .
+_:b1 <http://example.org/integer> "0"^^<http://www.w3.org/2001/XMLSchema#nonNegativeInteger> .
+_:b1 <http://example.org/integer> "36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#nonNegativeInteger> .
+_:b1 <http://example.org/integer> "1"^^<http://www.w3.org/2001/XMLSchema#positiveInteger> .
+_:b1 <http://example.org/integer> "36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#positiveInteger> .
+_:b1 <http://example.org/langString> "no language tag" .
+_:b1 <http://example.org/taggedString> "english"@en-ca .
+_:b1 <http://example.org/time> "P1Y6M"^^<http://www.w3.org/2001/XMLSchema#duration> .
+_:b1 <http://example.org/time> "12:15:01Z"^^<http://www.w3.org/2001/XMLSchema#time> .
+_:b1 <http://example.org/time> "2004-04-12Z"^^<http://www.w3.org/2001/XMLSchema#date> .
+_:b1 <http://example.org/binary> "A1B7F080"^^<http://www.w3.org/2001/XMLSchema#hexBinary> .
+_:b1 <http://example.org/binary> "Zm9vYmF="^^<http://www.w3.org/2001/XMLSchema#base64Binary> .
+_:b1 <http://example.org/other> "untyped" .
+_:b1 <http://example.org/other> <http://example.org/uri> .
+_:b1 <http://example.org/other> "notxsd"^^<http://example.org/sometype> .
+_:b1 <http://example.org/other> "unsupported"^^<http://www.w3.org/2001/XMLSchema#name> .
diff --git a/test/canon/test-canon.ttl b/test/canon/test-canon.ttl
new file mode 100644
index 00000000..0d0b4682
--- /dev/null
+++ b/test/canon/test-canon.ttl
@@ -0,0 +1,76 @@
+@base <http://example.org/> .
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+
+[
+ <boolean> " false "^^xsd:boolean ,
+ " 0 "^^xsd:boolean ,
+ " true "^^xsd:boolean ,
+ " 1 "^^xsd:boolean ;
+ <ieee754> " +0100.0 "^^xsd:float ,
+ " -0100.0 "^^xsd:float ,
+ " +01000.0 "^^xsd:double ,
+ " -01000.0 "^^xsd:double ;
+ <machine> " +09223372036854775807 "^^xsd:long ,
+ " -09223372036854775808 "^^xsd:long ,
+ " +02147483647 "^^xsd:int ,
+ " -02147483648 "^^xsd:int ,
+ " +032767 "^^xsd:short ,
+ " -032768 "^^xsd:short ,
+ " +0127 "^^xsd:byte ,
+ " -0128 "^^xsd:byte ,
+ " 01 "^^xsd:unsignedLong ,
+ " 018446744073709551615 "^^xsd:unsignedLong ,
+ " 01 "^^xsd:unsignedInt ,
+ " 04294967295 "^^xsd:unsignedInt ,
+ " 01 "^^xsd:unsignedShort ,
+ " 065535 "^^xsd:unsignedShort ,
+ " 01 "^^xsd:unsignedByte ,
+ " 0255 "^^xsd:unsignedByte ;
+ <decimal> " 00 "^^xsd:decimal ,
+ " +0 "^^xsd:decimal ,
+ " -0 "^^xsd:decimal ,
+ " 36893488147419103232 "^^xsd:decimal ,
+ " 0036893488147419103232 "^^xsd:decimal ,
+ " +36893488147419103232 "^^xsd:decimal ,
+ " +0036893488147419103232 "^^xsd:decimal ,
+ " +0036893488147419103232. "^^xsd:decimal ,
+ " +0036893488147419103232.00 "^^xsd:decimal ,
+ " +0036893488147419103232.12300 "^^xsd:decimal ,
+ " -36893488147419103232 "^^xsd:decimal ,
+ " -0036893488147419103232 "^^xsd:decimal ,
+ " -0036893488147419103232. "^^xsd:decimal ,
+ " -0036893488147419103232.00 "^^xsd:decimal ,
+ " -0036893488147419103232.12300 "^^xsd:decimal ,
+ " 00.12300 "^^xsd:decimal ,
+ " .12300 "^^xsd:decimal ,
+ " +.12300 "^^xsd:decimal ,
+ " +00.12300 "^^xsd:decimal ,
+ " -.12300 "^^xsd:decimal ,
+ " -00.12300 "^^xsd:decimal ;
+ <integer> " 36893488147419103232 "^^xsd:integer ,
+ " 0036893488147419103232 "^^xsd:integer ,
+ " +36893488147419103232 "^^xsd:integer ,
+ " +0036893488147419103232 "^^xsd:integer ,
+ " -36893488147419103232 "^^xsd:integer ,
+ " -0036893488147419103232 "^^xsd:integer ,
+ " 00 "^^xsd:nonPositiveInteger ,
+ " -036893488147419103232 "^^xsd:nonPositiveInteger ,
+ " -01 "^^xsd:negativeInteger ,
+ " -036893488147419103232 "^^xsd:negativeInteger ,
+ " 00 "^^xsd:nonNegativeInteger ,
+ " 036893488147419103232 "^^xsd:nonNegativeInteger ,
+ " +01 "^^xsd:positiveInteger ,
+ " 036893488147419103232 "^^xsd:positiveInteger ;
+ <langString> "no language tag"^^rdf:langString ;
+ <taggedString> "english"@EN-CA ;
+ <time> " P1Y6M0D "^^xsd:duration ,
+ " 12:15:01+00:00 "^^xsd:time ,
+ " 02004-04-12+00:00 "^^xsd:date ;
+ <binary> "A 1 B7 F080"^^xsd:hexBinary ,
+ " Zm 9v Y m F="^^xsd:base64Binary ;
+ <other> "untyped" ,
+ <uri> ,
+ "notxsd"^^<sometype> ,
+ "unsupported"^^xsd:name
+] .
diff --git a/test/meson.build b/test/meson.build
index c4dc4b3b..e8392559 100644
--- a/test/meson.build
+++ b/test/meson.build
@@ -249,6 +249,14 @@ if get_option('utils')
suite: ['rdf', 'serd'],
timeout: 240)
+ manifest = files('canon' / 'manifest.ttl')
+ base_uri = serd_base + 'canon' + '/'
+ test('canon', run_test_suite,
+ args: script_args + [manifest, base_uri, '--', '-C'],
+ env: test_env,
+ suite: ['rdf', 'serd'],
+ timeout: 240)
+
### The lax suite is special because it is run twice...
lax_manifest = files('lax/manifest.ttl')
lax_base_uri = serd_base + name + '/'
diff --git a/test/test_string.c b/test/test_string.c
index 2bd47680..f8534001 100644
--- a/test/test_string.c
+++ b/test/test_string.c
@@ -32,7 +32,7 @@ test_strerror(void)
{
const char* msg = serd_strerror(SERD_SUCCESS);
assert(!strcmp(msg, "Success"));
- for (int i = SERD_FAILURE; i <= SERD_ERR_BAD_INDEX; ++i) {
+ for (int i = SERD_FAILURE; i <= SERD_ERR_INVALID; ++i) {
msg = serd_strerror((SerdStatus)i);
assert(strcmp(msg, "Success"));
}