diff options
author | David Robillard <d@drobilla.net> | 2019-10-14 23:26:41 +0200 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2021-03-08 23:36:08 -0500 |
commit | 1fd33e0a85bdf6bcc4f8138940462c4a4a391175 (patch) | |
tree | fab500d072049701370f45c5ae2af76683d8fd1f | |
parent | 7efaf2431b9c1a96f6ed2e28626aff4886efc749 (diff) | |
download | serd-1fd33e0a85bdf6bcc4f8138940462c4a4a391175.tar.gz serd-1fd33e0a85bdf6bcc4f8138940462c4a4a391175.tar.bz2 serd-1fd33e0a85bdf6bcc4f8138940462c4a4a391175.zip |
Add support for converting literals to canonical form
-rw-r--r-- | NEWS | 1 | ||||
-rw-r--r-- | doc/serdi.1 | 19 | ||||
-rw-r--r-- | include/serd/serd.h | 26 | ||||
-rw-r--r-- | meson.build | 1 | ||||
-rw-r--r-- | src/canon.c | 194 | ||||
-rw-r--r-- | src/node.c | 2 | ||||
-rw-r--r-- | src/node.h | 4 | ||||
-rw-r--r-- | src/serdi.c | 20 | ||||
-rw-r--r-- | src/string_utils.h | 2 | ||||
-rw-r--r-- | test/meson.build | 7 | ||||
-rw-r--r-- | test/normalise/bad-boolean.ttl | 5 | ||||
-rw-r--r-- | test/normalise/bad-decimal-leading.ttl | 4 | ||||
-rw-r--r-- | test/normalise/bad-decimal-trailing.ttl | 4 | ||||
-rw-r--r-- | test/normalise/bad-empty-boolean.ttl | 5 | ||||
-rw-r--r-- | test/normalise/bad-integer-leading.ttl | 4 | ||||
-rw-r--r-- | test/normalise/bad-integer-trailing.ttl | 4 | ||||
-rw-r--r-- | test/normalise/manifest.ttl | 60 | ||||
-rw-r--r-- | test/normalise/test-normalise.nt | 69 | ||||
-rw-r--r-- | test/normalise/test-normalise.ttl | 75 |
19 files changed, 499 insertions, 7 deletions
@@ -5,6 +5,7 @@ serd (1.0.1) unstable; * Add extensible logging API * Add model for storing statements in memory * Add option for writing terse output without newlines + * Add support for converting literals to canonical form * Add support for parsing variables * Add support for validation * Add support for writing terse collections diff --git a/doc/serdi.1 b/doc/serdi.1 index 888dab03..fcd58535 100644 --- a/doc/serdi.1 +++ b/doc/serdi.1 @@ -6,7 +6,7 @@ .Nd read and write RDF syntax .Sh SYNOPSIS .Nm serdi -.Op Fl abefhlqv +.Op Fl Cabefhlqv .Op Fl I Ar base .Op Fl c Ar prefix .Op Fl i Ar syntax @@ -43,6 +43,23 @@ the URI of the file is automatically used as the base URI. This option can be used to override that, or to provide a base URI for input from stdin or a string. .Pp +.It Fl C +Convert literals to canonical form. +Literals with supported XSD datatypes will be parsed and rewritten canonically. +All numeric datatypes are supported, as well as +.Vt boolean , +.Vt duration , +.Vt datetime , +.Vt time , +.Vt hexBinary , +and +.Vt base64Binary . +.Pp +.It Fl S +Stream model quickly without inlining. +This only has an effect when a model is used, and disables searching and statement reordering for pretty printing. +Statements will be written in simple sorted order, which is faster, but may result in less readable output in Turtle or TriG. +.Pp .It Fl V Validate inputs. All necessary data, including schemas, must be passed as inputs. diff --git a/include/serd/serd.h b/include/serd/serd.h index 93e280dc..8eb5470d 100644 --- a/include/serd/serd.h +++ b/include/serd/serd.h @@ -1462,6 +1462,32 @@ serd_sink_write_end(const SerdSink* SERD_NONNULL sink, /** @} + @defgroup serd_stream_processing Stream Processing + @{ +*/ + +/// Flags that control canonical node transformation +typedef enum { + SERD_CANON_LAX = 1u << 0u, ///< Tolerate and pass through invalid input +} SerdCanonFlag; + +/// Bitwise OR of SerdCanonFlag values +typedef uint32_t SerdCanonFlags; + +/** + Return a sink that transforms literals to canonical form where possible. + + The returned sink acts like `target` in all respects, except literal nodes + in statements may be modified from the original. +*/ +SERD_API +SerdSink* SERD_ALLOCATED +serd_canon_new(const SerdWorld* SERD_NULLABLE world, + const SerdSink* SERD_NONNULL target, + SerdReaderFlags flags); + +/** + @} @defgroup serd_reader Reader @{ */ diff --git a/meson.build b/meson.build index bc026f04..80917230 100644 --- a/meson.build +++ b/meson.build @@ -89,6 +89,7 @@ sources = [ 'src/base64.c', 'src/byte_sink.c', 'src/byte_source.c', + 'src/canon.c', 'src/cursor.c', 'src/env.c', 'src/inserter.c', diff --git a/src/canon.c b/src/canon.c new file mode 100644 index 00000000..99351f9e --- /dev/null +++ b/src/canon.c @@ -0,0 +1,194 @@ +/* + Copyright 2019-2020 David Robillard <d@drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "cursor.h" +#include "namespaces.h" +#include "node.h" +#include "statement.h" +#include "string_utils.h" +#include "world.h" + +#include "exess/exess.h" +#include "serd/serd.h" + +#include <assert.h> +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> + +typedef struct { + const SerdWorld* world; + const SerdSink* target; + SerdCanonFlags flags; +} SerdCanonData; + +static ExessResult +make_canonical(SerdNode** const out, const SerdNode* const SERD_NONNULL node) +{ + *out = NULL; + + const char* str = serd_node_string(node); + const SerdNode* datatype = serd_node_datatype(node); + ExessResult r = {EXESS_SUCCESS, 0}; + + if (serd_node_type(datatype) != SERD_URI) { + r.status = EXESS_BAD_VALUE; + return r; + } + + const char* datatype_uri = serd_node_string(datatype); + if (!strcmp(datatype_uri, NS_RDF "langString")) { + *out = serd_new_string(serd_node_string_view(node)); + return r; + } + + const ExessDatatype value_type = exess_datatype_from_uri(datatype_uri); + if (value_type == EXESS_NOTHING) { + return r; + } + + // Measure canonical form to know how much space to allocate for node + ExessVariant variant = exess_make_nothing(EXESS_SUCCESS); + if (exess_datatype_is_bounded(value_type)) { + r = exess_read_variant(&variant, value_type, str); + if (!r.status) { + r = exess_write_variant(variant, 0, NULL); + } + } else { + r = exess_write_canonical(str, value_type, 0, NULL); + } + + if (r.status) { + return r; + } + + // Allocate node + const size_t datatype_uri_len = serd_node_length(datatype); + const size_t len = serd_node_pad_size(r.count); + const size_t total_len = sizeof(SerdNode) + len + datatype_uri_len; + + SerdNode* const result = + serd_node_malloc(total_len, SERD_HAS_DATATYPE, SERD_LITERAL); + + // Write canonical form directly into node + char* buf = serd_node_buffer(result); + if (exess_datatype_is_bounded(value_type)) { + r = exess_write_variant(variant, r.count + 1, buf); + result->n_bytes = r.count; + } else { + r = exess_write_canonical(str, value_type, r.count + 1, buf); + result->n_bytes = r.count; + } + + if (r.status) { + serd_node_free(result); + return r; + } + + SerdNode* const datatype_node = result + 1 + (len / sizeof(SerdNode)); + char* const datatype_buf = serd_node_buffer(datatype_node); + + datatype_node->n_bytes = datatype_uri_len; + datatype_node->type = SERD_URI; + memcpy(datatype_buf, datatype_uri, datatype_uri_len + 1); + + /* serd_node_check_padding(datatype_node); */ + /* serd_node_check_padding(result); */ + + *out = result; + return r; +} + +static SerdStatus +serd_canon_on_statement(SerdCanonData* data, + SerdStatementFlags flags, + const SerdStatement* statement) +{ + const SerdNode* object = serd_statement_object(statement); + if (serd_node_type(object) != SERD_LITERAL || !serd_node_datatype(object)) { + return serd_sink_write_statement(data->target, flags, statement); + } + + SerdNode* normo = NULL; + ExessResult r = make_canonical(&normo, object); + if (r.status) { + const bool lax = (data->flags & SERD_CANON_LAX); + const SerdLogLevel level = + lax ? SERD_LOG_LEVEL_WARNING : SERD_LOG_LEVEL_ERR; + + if (statement->cursor) { + // Adjust column to point at the exact error location in the literal + const SerdCursor cursor = {statement->cursor->file, + statement->cursor->line, + statement->cursor->col + 1 + + (unsigned)r.count}; + + serd_world_logf_internal(data->world, + SERD_ERR_INVALID, + level, + &cursor, + "invalid literal (%s)\n", + exess_strerror(r.status)); + } else { + serd_world_logf_internal(data->world, + SERD_ERR_INVALID, + level, + NULL, + "invalid literal (%s)\n", + exess_strerror(r.status)); + } + + if (!lax) { + return SERD_ERR_INVALID; + } + } + + if (!normo) { + return serd_sink_write_statement(data->target, flags, statement); + } + + const SerdStatus st = serd_sink_write(data->target, + flags, + statement->nodes[0], + statement->nodes[1], + normo, + statement->nodes[3]); + serd_node_free(normo); + return st; +} + +static SerdStatus +serd_canon_on_event(SerdCanonData* data, const SerdEvent* event) +{ + return (event->type == SERD_STATEMENT) + ? serd_canon_on_statement( + data, event->statement.flags, event->statement.statement) + : serd_sink_write_event(data->target, event); +} + +SerdSink* +serd_canon_new(const SerdWorld* world, + const SerdSink* target, + const SerdCanonFlags flags) +{ + SerdCanonData* const data = (SerdCanonData*)calloc(1, sizeof(SerdCanonData)); + + data->world = world; + data->target = target; + data->flags = flags; + + return serd_sink_new(data, (SerdEventFunc)serd_canon_on_event, free); +} @@ -48,7 +48,7 @@ typedef struct { static SerdNode* serd_new_from_uri(const SerdURIView uri, const SerdURIView base); -static size_t +size_t serd_node_pad_size(const size_t n_bytes) { const size_t pad = sizeof(SerdNode) - (n_bytes + 2) % sizeof(SerdNode); @@ -62,6 +62,10 @@ void serd_node_set(SerdNode* SERD_NONNULL* SERD_NONNULL dst, const SerdNode* SERD_NONNULL src); +SERD_CONST_FUNC +size_t +serd_node_pad_size(const size_t n_bytes); + SERD_PURE_FUNC size_t serd_node_total_size(const SerdNode* SERD_NONNULL node); diff --git a/src/serdi.c b/src/serdi.c index 70c52682..b028b862 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -55,6 +55,7 @@ print_usage(const char* name, bool error) fprintf(os, "Usage: %s [OPTION]... INPUT...\n", name); fprintf(os, "Read and write RDF syntax.\n"); fprintf(os, "Use - for INPUT to read from standard input.\n\n"); + fprintf(os, " -C Convert literals to canonical form.\n"); fprintf(os, " -I BASE_URI Input base URI.\n"); fprintf(os, " -V Validate inputs.\n"); fprintf(os, " -a Write ASCII output if possible.\n"); @@ -150,6 +151,7 @@ main(int argc, char** argv) bool osyntax_set = false; bool validate = false; bool use_model = false; + bool normalise = false; bool quiet = false; size_t stack_size = 4194304; const char* input_string = NULL; @@ -163,7 +165,9 @@ main(int argc, char** argv) break; } - if (argv[a][1] == 'I') { + if (argv[a][1] == 'C') { + normalise = true; + } else if (argv[a][1] == 'I') { if (++a == argc) { return missing_arg(argv[0], 'I'); } @@ -316,7 +320,7 @@ main(int argc, char** argv) SerdModel* model = NULL; SerdSink* inserter = NULL; - const SerdSink* sink = NULL; + const SerdSink* out_sink = NULL; if (use_model) { const SerdModelFlags flags = SERD_INDEX_SPO | (input_has_graphs ? SERD_INDEX_GRAPHS : 0u) | @@ -324,9 +328,16 @@ main(int argc, char** argv) model = serd_model_new(world, flags); inserter = serd_inserter_new(model, env, NULL); - sink = inserter; + out_sink = inserter; } else { - sink = serd_writer_sink(writer); + out_sink = serd_writer_sink(writer); + } + + const SerdSink* sink = out_sink; + + SerdSink* canon = NULL; + if (normalise) { + sink = canon = serd_canon_new(world, out_sink, reader_flags); } if (quiet) { @@ -410,6 +421,7 @@ main(int argc, char** argv) serd_range_free(range); } + serd_sink_free(canon); serd_sink_free(inserter); serd_model_free(model); serd_writer_free(writer); diff --git a/src/string_utils.h b/src/string_utils.h index 0e9eee43..a302bc49 100644 --- a/src/string_utils.h +++ b/src/string_utils.h @@ -62,7 +62,7 @@ is_xdigit(const int c) } static inline bool -is_space(const char c) +is_space(const int c) { switch (c) { case ' ': diff --git a/test/meson.build b/test/meson.build index 87b77334..d364b4f2 100644 --- a/test/meson.build +++ b/test/meson.build @@ -173,6 +173,13 @@ if get_option('utils') suite: ['rdf', 'serd'], timeout: 240) + manifest = files('normalise' / 'manifest.ttl') + base_uri = serd_base + 'normalise' + '/' + test('normalise', run_test_suite, + args: script_args + [manifest, base_uri, '--', '-C'], + suite: ['rdf', 'serd'], + timeout: 240) + ### Run the lax suite with lax parsing enabled as well manifest = files('lax/manifest.ttl') base_uri = serd_base + 'lax/' diff --git a/test/normalise/bad-boolean.ttl b/test/normalise/bad-boolean.ttl new file mode 100644 index 00000000..c4fc3eb5 --- /dev/null +++ b/test/normalise/bad-boolean.ttl @@ -0,0 +1,5 @@ +@base <http://example.org/> . +@prefix xsd: <http://www.w3.org/2001/XMLSchema#> . + +[] <boolean> " ja "^^xsd:boolean . + diff --git a/test/normalise/bad-decimal-leading.ttl b/test/normalise/bad-decimal-leading.ttl new file mode 100644 index 00000000..0d18eac7 --- /dev/null +++ b/test/normalise/bad-decimal-leading.ttl @@ -0,0 +1,4 @@ +@base <http://example.org/> . +@prefix xsd: <http://www.w3.org/2001/XMLSchema#> . + +[] <decimal> " junk 1234.5678 "^^xsd:decimal . diff --git a/test/normalise/bad-decimal-trailing.ttl b/test/normalise/bad-decimal-trailing.ttl new file mode 100644 index 00000000..10882ef5 --- /dev/null +++ b/test/normalise/bad-decimal-trailing.ttl @@ -0,0 +1,4 @@ +@base <http://example.org/> . +@prefix xsd: <http://www.w3.org/2001/XMLSchema#> . + +[] <decimal> " 1234.5678 junk "^^xsd:decimal . diff --git a/test/normalise/bad-empty-boolean.ttl b/test/normalise/bad-empty-boolean.ttl new file mode 100644 index 00000000..9a390c46 --- /dev/null +++ b/test/normalise/bad-empty-boolean.ttl @@ -0,0 +1,5 @@ +@base <http://example.org/> . +@prefix xsd: <http://www.w3.org/2001/XMLSchema#> . + +[] <boolean> ""^^xsd:boolean . + diff --git a/test/normalise/bad-integer-leading.ttl b/test/normalise/bad-integer-leading.ttl new file mode 100644 index 00000000..80c1a6af --- /dev/null +++ b/test/normalise/bad-integer-leading.ttl @@ -0,0 +1,4 @@ +@base <http://example.org/> . +@prefix xsd: <http://www.w3.org/2001/XMLSchema#> . + +[] <integer> " junk 987654321 "^^xsd:integer . diff --git a/test/normalise/bad-integer-trailing.ttl b/test/normalise/bad-integer-trailing.ttl new file mode 100644 index 00000000..a94a9ec4 --- /dev/null +++ b/test/normalise/bad-integer-trailing.ttl @@ -0,0 +1,4 @@ +@base <http://example.org/> . +@prefix xsd: <http://www.w3.org/2001/XMLSchema#> . + +[] <integer> " 987654321 junk "^^xsd:integer . diff --git a/test/normalise/manifest.ttl b/test/normalise/manifest.ttl new file mode 100644 index 00000000..a76f589d --- /dev/null +++ b/test/normalise/manifest.ttl @@ -0,0 +1,60 @@ +@prefix mf: <http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#> . +@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . +@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . +@prefix rdft: <http://www.w3.org/ns/rdftest#> . + +<> + rdf:type mf:Manifest ; + rdfs:comment "Serd normalisation test cases" ; + mf:entries ( + <#bad-boolean> + <#bad-decimal-leading> + <#bad-decimal-trailing> + <#bad-empty-boolean> + <#bad-integer-leading> + <#bad-integer-trailing> + <#test-normalise> + ) . + +<#bad-boolean> + rdf:type rdft:TestTurtleNegativeEval ; + mf:name "bad-boolean" ; + rdfs:comment "Invalid xsd::boolean syntax" ; + mf:action <bad-boolean.ttl> . + +<#bad-decimal-leading> + rdf:type rdft:TestTurtleNegativeEval ; + mf:name "bad-decimal-leading" ; + rdfs:comment "Invalid xsd::decimal syntax (leading garbage)" ; + mf:action <bad-decimal-leading.ttl> . + +<#bad-decimal-trailing> + rdf:type rdft:TestTurtleNegativeEval ; + mf:name "bad-decimal-trailing" ; + rdfs:comment "Invalid xsd::decimal syntax (trailing garbage)" ; + mf:action <bad-decimal-trailing.ttl> . + +<#bad-empty-boolean> + rdf:type rdft:TestTurtleNegativeEval ; + mf:name "bad-empty-boolean" ; + rdfs:comment "Invalid xsd::boolean syntax (no value)" ; + mf:action <bad-empty-boolean.ttl> . + +<#bad-integer-leading> + rdf:type rdft:TestTurtleNegativeEval ; + mf:name "bad-integer-leading" ; + rdfs:comment "Invalid xsd::integer syntax (leading garbage)" ; + mf:action <bad-integer-leading.ttl> . + +<#bad-integer-trailing> + rdf:type rdft:TestTurtleNegativeEval ; + mf:name "bad-integer-trailing" ; + rdfs:comment "Invalid xsd::integer syntax (trailing garbage)" ; + mf:action <bad-integer-trailing.ttl> . + +<#test-normalise> + rdf:type rdft:TestTurtleEval ; + mf:name "test-normalise" ; + mf:action <test-normalise.ttl> ; + mf:result <test-normalise.nt> . + diff --git a/test/normalise/test-normalise.nt b/test/normalise/test-normalise.nt new file mode 100644 index 00000000..ed32a059 --- /dev/null +++ b/test/normalise/test-normalise.nt @@ -0,0 +1,69 @@ +_:b1 <http://example.org/boolean> "false"^^<http://www.w3.org/2001/XMLSchema#boolean> . +_:b1 <http://example.org/boolean> "false"^^<http://www.w3.org/2001/XMLSchema#boolean> . +_:b1 <http://example.org/boolean> "true"^^<http://www.w3.org/2001/XMLSchema#boolean> . +_:b1 <http://example.org/boolean> "true"^^<http://www.w3.org/2001/XMLSchema#boolean> . +_:b1 <http://example.org/ieee754> "1.0E2"^^<http://www.w3.org/2001/XMLSchema#float> . +_:b1 <http://example.org/ieee754> "-1.0E2"^^<http://www.w3.org/2001/XMLSchema#float> . +_:b1 <http://example.org/ieee754> "1.0E3"^^<http://www.w3.org/2001/XMLSchema#double> . +_:b1 <http://example.org/ieee754> "-1.0E3"^^<http://www.w3.org/2001/XMLSchema#double> . +_:b1 <http://example.org/machine> "9223372036854775807"^^<http://www.w3.org/2001/XMLSchema#long> . +_:b1 <http://example.org/machine> "-9223372036854775808"^^<http://www.w3.org/2001/XMLSchema#long> . +_:b1 <http://example.org/machine> "2147483647"^^<http://www.w3.org/2001/XMLSchema#int> . +_:b1 <http://example.org/machine> "-2147483648"^^<http://www.w3.org/2001/XMLSchema#int> . +_:b1 <http://example.org/machine> "32767"^^<http://www.w3.org/2001/XMLSchema#short> . +_:b1 <http://example.org/machine> "-32768"^^<http://www.w3.org/2001/XMLSchema#short> . +_:b1 <http://example.org/machine> "127"^^<http://www.w3.org/2001/XMLSchema#byte> . +_:b1 <http://example.org/machine> "-128"^^<http://www.w3.org/2001/XMLSchema#byte> . +_:b1 <http://example.org/machine> "1"^^<http://www.w3.org/2001/XMLSchema#unsignedLong> . +_:b1 <http://example.org/machine> "18446744073709551615"^^<http://www.w3.org/2001/XMLSchema#unsignedLong> . +_:b1 <http://example.org/machine> "1"^^<http://www.w3.org/2001/XMLSchema#unsignedInt> . +_:b1 <http://example.org/machine> "4294967295"^^<http://www.w3.org/2001/XMLSchema#unsignedInt> . +_:b1 <http://example.org/machine> "1"^^<http://www.w3.org/2001/XMLSchema#unsignedShort> . +_:b1 <http://example.org/machine> "65535"^^<http://www.w3.org/2001/XMLSchema#unsignedShort> . +_:b1 <http://example.org/machine> "1"^^<http://www.w3.org/2001/XMLSchema#unsignedByte> . +_:b1 <http://example.org/machine> "255"^^<http://www.w3.org/2001/XMLSchema#unsignedByte> . +_:b1 <http://example.org/decimal> "0.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "0.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "-0.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "36893488147419103232.123"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "-36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "-36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "-36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "-36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "-36893488147419103232.123"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "0.123"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "0.123"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "0.123"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "0.123"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "-0.123"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "-0.123"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/integer> "36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#integer> . +_:b1 <http://example.org/integer> "36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#integer> . +_:b1 <http://example.org/integer> "36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#integer> . +_:b1 <http://example.org/integer> "36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#integer> . +_:b1 <http://example.org/integer> "-36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#integer> . +_:b1 <http://example.org/integer> "-36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#integer> . +_:b1 <http://example.org/integer> "0"^^<http://www.w3.org/2001/XMLSchema#nonPositiveInteger> . +_:b1 <http://example.org/integer> "-36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#nonPositiveInteger> . +_:b1 <http://example.org/integer> "-1"^^<http://www.w3.org/2001/XMLSchema#negativeInteger> . +_:b1 <http://example.org/integer> "-36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#negativeInteger> . +_:b1 <http://example.org/integer> "0"^^<http://www.w3.org/2001/XMLSchema#nonNegativeInteger> . +_:b1 <http://example.org/integer> "36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#nonNegativeInteger> . +_:b1 <http://example.org/integer> "1"^^<http://www.w3.org/2001/XMLSchema#positiveInteger> . +_:b1 <http://example.org/integer> "36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#positiveInteger> . +_:b1 <http://example.org/langString> "no language tag" . +_:b1 <http://example.org/time> "P1Y6M"^^<http://www.w3.org/2001/XMLSchema#duration> . +_:b1 <http://example.org/time> "12:15:01Z"^^<http://www.w3.org/2001/XMLSchema#time> . +_:b1 <http://example.org/time> "2004-04-12Z"^^<http://www.w3.org/2001/XMLSchema#date> . +_:b1 <http://example.org/binary> "A1B7F080"^^<http://www.w3.org/2001/XMLSchema#hexBinary> . +_:b1 <http://example.org/binary> "Zm9vYmF="^^<http://www.w3.org/2001/XMLSchema#base64Binary> . +_:b1 <http://example.org/other> "untyped" . +_:b1 <http://example.org/other> <http://example.org/uri> . +_:b1 <http://example.org/other> "notxsd"^^<http://example.org/sometype> . +_:b1 <http://example.org/other> "unsupported"^^<http://www.w3.org/2001/XMLSchema#name> . diff --git a/test/normalise/test-normalise.ttl b/test/normalise/test-normalise.ttl new file mode 100644 index 00000000..8cb3f0aa --- /dev/null +++ b/test/normalise/test-normalise.ttl @@ -0,0 +1,75 @@ +@base <http://example.org/> . +@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . +@prefix xsd: <http://www.w3.org/2001/XMLSchema#> . + +[ + <boolean> " false "^^xsd:boolean , + " 0 "^^xsd:boolean , + " true "^^xsd:boolean , + " 1 "^^xsd:boolean ; + <ieee754> " +0100.0 "^^xsd:float , + " -0100.0 "^^xsd:float , + " +01000.0 "^^xsd:double , + " -01000.0 "^^xsd:double ; + <machine> " +09223372036854775807 "^^xsd:long , + " -09223372036854775808 "^^xsd:long , + " +02147483647 "^^xsd:int , + " -02147483648 "^^xsd:int , + " +032767 "^^xsd:short , + " -032768 "^^xsd:short , + " +0127 "^^xsd:byte , + " -0128 "^^xsd:byte , + " 01 "^^xsd:unsignedLong , + " 018446744073709551615 "^^xsd:unsignedLong , + " 01 "^^xsd:unsignedInt , + " 04294967295 "^^xsd:unsignedInt , + " 01 "^^xsd:unsignedShort , + " 065535 "^^xsd:unsignedShort , + " 01 "^^xsd:unsignedByte , + " 0255 "^^xsd:unsignedByte ; + <decimal> " 00 "^^xsd:decimal , + " +0 "^^xsd:decimal , + " -0 "^^xsd:decimal , + " 36893488147419103232 "^^xsd:decimal , + " 0036893488147419103232 "^^xsd:decimal , + " +36893488147419103232 "^^xsd:decimal , + " +0036893488147419103232 "^^xsd:decimal , + " +0036893488147419103232. "^^xsd:decimal , + " +0036893488147419103232.00 "^^xsd:decimal , + " +0036893488147419103232.12300 "^^xsd:decimal , + " -36893488147419103232 "^^xsd:decimal , + " -0036893488147419103232 "^^xsd:decimal , + " -0036893488147419103232. "^^xsd:decimal , + " -0036893488147419103232.00 "^^xsd:decimal , + " -0036893488147419103232.12300 "^^xsd:decimal , + " 00.12300 "^^xsd:decimal , + " .12300 "^^xsd:decimal , + " +.12300 "^^xsd:decimal , + " +00.12300 "^^xsd:decimal , + " -.12300 "^^xsd:decimal , + " -00.12300 "^^xsd:decimal ; + <integer> " 36893488147419103232 "^^xsd:integer , + " 0036893488147419103232 "^^xsd:integer , + " +36893488147419103232 "^^xsd:integer , + " +0036893488147419103232 "^^xsd:integer , + " -36893488147419103232 "^^xsd:integer , + " -0036893488147419103232 "^^xsd:integer , + " 00 "^^xsd:nonPositiveInteger , + " -036893488147419103232 "^^xsd:nonPositiveInteger , + " -01 "^^xsd:negativeInteger , + " -036893488147419103232 "^^xsd:negativeInteger , + " 00 "^^xsd:nonNegativeInteger , + " 036893488147419103232 "^^xsd:nonNegativeInteger , + " +01 "^^xsd:positiveInteger , + " 036893488147419103232 "^^xsd:positiveInteger ; + <langString> "no language tag"^^rdf:langString ; + <time> " P1Y6M0D "^^xsd:duration , + " 12:15:01+00:00 "^^xsd:time , + " 02004-04-12+00:00 "^^xsd:date ; + <binary> "A 1 B7 F080"^^xsd:hexBinary , + " Zm 9v Y m F="^^xsd:base64Binary ; + <other> "untyped" , + <uri> , + "notxsd"^^<sometype> , + "unsupported"^^xsd:name +] . |