diff options
author | David Robillard <d@drobilla.net> | 2021-05-30 12:23:07 -0400 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2022-01-14 19:37:51 -0500 |
commit | 89612ec05f596d135640413e093251fb9691ca14 (patch) | |
tree | 9c3e3d4491e6eb694aa0d429ba52ad80b0a5dc17 | |
parent | cab9b5621b4db600376090f28fdc76c7ac6bd728 (diff) | |
download | serd-89612ec05f596d135640413e093251fb9691ca14.tar.gz serd-89612ec05f596d135640413e093251fb9691ca14.tar.bz2 serd-89612ec05f596d135640413e093251fb9691ca14.zip |
Add support for converting literals to canonical form
-rw-r--r-- | NEWS | 1 | ||||
-rw-r--r-- | doc/serdi.1 | 14 | ||||
-rw-r--r-- | include/serd/serd.h | 27 | ||||
-rw-r--r-- | meson.build | 1 | ||||
-rw-r--r-- | src/canon.c | 192 | ||||
-rw-r--r-- | src/serdi.c | 20 | ||||
-rw-r--r-- | src/string.c | 2 | ||||
-rw-r--r-- | src/string_utils.h | 8 | ||||
-rw-r--r-- | test/canon/bad-boolean.ttl | 5 | ||||
-rw-r--r-- | test/canon/bad-decimal-leading.ttl | 4 | ||||
-rw-r--r-- | test/canon/bad-decimal-trailing.ttl | 4 | ||||
-rw-r--r-- | test/canon/bad-empty-boolean.ttl | 5 | ||||
-rw-r--r-- | test/canon/bad-integer-leading.ttl | 4 | ||||
-rw-r--r-- | test/canon/bad-integer-trailing.ttl | 4 | ||||
-rw-r--r-- | test/canon/manifest.ttl | 58 | ||||
-rw-r--r-- | test/canon/test-canon.nt | 70 | ||||
-rw-r--r-- | test/canon/test-canon.ttl | 76 | ||||
-rw-r--r-- | test/meson.build | 8 | ||||
-rw-r--r-- | test/test_string.c | 2 |
19 files changed, 495 insertions, 10 deletions
@@ -5,6 +5,7 @@ serd (1.0.1) unstable; * Add extensible logging API * Add model for storing statements in memory * Add option for writing terse output without newlines + * Add support for converting literals to canonical form * Add support for parsing variables * Add support for writing terse collections * Add support for xsd:float and xsd:double literals diff --git a/doc/serdi.1 b/doc/serdi.1 index e9d70857..59eb67ce 100644 --- a/doc/serdi.1 +++ b/doc/serdi.1 @@ -6,7 +6,7 @@ .Nd read, transform, and write RDF data .Sh SYNOPSIS .Nm serdi -.Op Fl abefhlmqtvx +.Op Fl Cabefhlmqtvx .Op Fl I Ar base .Op Fl c Ar prefix .Op Fl i Ar syntax @@ -35,6 +35,18 @@ or transform URIs and blank node IDs. The options are as follows: .Pp .Bl -tag -compact -width 3n +.It Fl C +Convert literals to canonical form. +Literals with supported XSD datatypes will be parsed and rewritten canonically. +All numeric datatypes are supported, as well as +.Vt boolean , +.Vt duration , +.Vt datetime , +.Vt time , +.Vt hexBinary , +and +.Vt base64Binary . +.Pp .It Fl I Ar base Input base URI. Relative URI references in the input will be resolved against this. diff --git a/include/serd/serd.h b/include/serd/serd.h index efa48543..fad9d070 100644 --- a/include/serd/serd.h +++ b/include/serd/serd.h @@ -217,6 +217,7 @@ typedef enum { SERD_ERR_BAD_CALL, ///< Invalid call SERD_ERR_BAD_URI, ///< Invalid or unresolved URI SERD_ERR_BAD_INDEX, ///< No optimal model index available + SERD_ERR_INVALID, ///< Invalid data } SerdStatus; /** @@ -1939,6 +1940,32 @@ serd_sink_write_end(const SerdSink* SERD_NONNULL sink, /** @} + @defgroup serd_canon Canon + @{ +*/ + +/// Flags that control canonical node transformation +typedef enum { + SERD_CANON_LAX = 1u << 0u, ///< Tolerate and pass through invalid input +} SerdCanonFlag; + +/// Bitwise OR of SerdCanonFlag values +typedef uint32_t SerdCanonFlags; + +/** + Return a new sink that transforms literals to canonical form where possible. + + The returned sink acts like `target` in all respects, except literal nodes + in statements may be modified from the original. +*/ +SERD_API +SerdSink* SERD_ALLOCATED +serd_canon_new(const SerdWorld* SERD_NULLABLE world, + const SerdSink* SERD_NONNULL target, + SerdCanonFlags flags); + +/** + @} @defgroup serd_env Environment @{ */ diff --git a/meson.build b/meson.build index 5197f448..25cce6d0 100644 --- a/meson.build +++ b/meson.build @@ -86,6 +86,7 @@ c_header = files('include/serd/serd.h') sources = [ 'src/byte_sink.c', 'src/byte_source.c', + 'src/canon.c', 'src/caret.c', 'src/compare.c', 'src/cursor.c', diff --git a/src/canon.c b/src/canon.c new file mode 100644 index 00000000..84d20d0c --- /dev/null +++ b/src/canon.c @@ -0,0 +1,192 @@ +/* + Copyright 2019-2022 David Robillard <d@drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "caret.h" +#include "namespaces.h" +#include "node.h" +#include "statement.h" +#include "string_utils.h" + +#include "exess/exess.h" +#include "serd/serd.h" + +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> + +typedef struct { + const SerdWorld* world; + const SerdSink* target; + SerdCanonFlags flags; +} SerdCanonData; + +static ExessResult +build_typed(SerdNode** const out, + const SerdNode* const SERD_NONNULL node, + const SerdNode* const SERD_NONNULL datatype) +{ + *out = NULL; + + const char* str = serd_node_string(node); + const char* datatype_uri = serd_node_string(datatype); + ExessResult r = {EXESS_SUCCESS, 0}; + + if (!strcmp(datatype_uri, NS_RDF "langString")) { + *out = serd_new_string(serd_node_string_view(node)); + return r; + } + + const ExessDatatype value_type = exess_datatype_from_uri(datatype_uri); + if (value_type == EXESS_NOTHING) { + return r; + } + + // Measure canonical form to know how much space to allocate for node + if ((r = exess_write_canonical(str, value_type, 0, NULL)).status) { + return r; + } + + // Allocate node + const size_t datatype_uri_len = serd_node_length(datatype); + const size_t datatype_size = serd_node_total_size(datatype); + const size_t len = serd_node_pad_length(r.count); + const size_t total_len = sizeof(SerdNode) + len + datatype_size; + SerdNode* const result = serd_node_malloc(total_len); + + result->length = r.count; + result->flags = SERD_HAS_DATATYPE; + result->type = SERD_LITERAL; + + // Write canonical form directly into node + exess_write_canonical(str, value_type, r.count + 1, serd_node_buffer(result)); + + SerdNode* const datatype_node = result + 1 + (len / sizeof(SerdNode)); + char* const datatype_buf = serd_node_buffer(datatype_node); + + datatype_node->length = datatype_uri_len; + datatype_node->type = SERD_URI; + memcpy(datatype_buf, datatype_uri, datatype_uri_len + 1); + + *out = result; + return r; +} + +static ExessResult +build_tagged(SerdNode** const out, + const SerdNode* const SERD_NONNULL node, + const SerdNode* const SERD_NONNULL language) +{ +#define MAX_LANG_LEN 48 // RFC5646 requires 35, RFC4646 recommends 42 + + const size_t node_len = serd_node_length(node); + const char* const lang = serd_node_string(language); + const size_t lang_len = serd_node_length(language); + if (lang_len > MAX_LANG_LEN) { + const ExessResult r = {EXESS_NO_SPACE, node_len}; + return r; + } + + // Convert language tag to lower-case + char canonical_lang[MAX_LANG_LEN] = {0}; + for (size_t i = 0u; i < lang_len; ++i) { + canonical_lang[i] = serd_to_lower(lang[i]); + } + + // Make a new literal that is otherwise identical + *out = serd_new_literal(serd_node_string_view(node), + serd_node_flags(node), + SERD_SUBSTRING(canonical_lang, lang_len)); + + const ExessResult r = {EXESS_SUCCESS, node_len}; + return r; + +#undef MAX_LANG_LEN +} + +static SerdStatus +serd_canon_on_statement(SerdCanonData* const data, + const SerdStatementFlags flags, + const SerdStatement* const statement) +{ + const SerdNode* const object = serd_statement_object(statement); + const SerdNode* const datatype = serd_node_datatype(object); + const SerdNode* const language = serd_node_language(object); + if (!datatype && !language) { + return serd_sink_write_statement(data->target, flags, statement); + } + + SerdNode* normo = NULL; + const ExessResult r = datatype ? build_typed(&normo, object, datatype) + : build_tagged(&normo, object, language); + + if (r.status) { + SerdCaret caret = {NULL, 0u, 0u}; + const bool lax = (data->flags & SERD_CANON_LAX); + + if (statement->caret) { + // Adjust column to point at the error within the literal + caret.file = statement->caret->file; + caret.line = statement->caret->line; + caret.col = statement->caret->col + 1 + (unsigned)r.count; + } + + serd_logf_at(data->world, + lax ? SERD_LOG_LEVEL_WARNING : SERD_LOG_LEVEL_ERROR, + statement->caret ? &caret : NULL, + "invalid literal (%s)", + exess_strerror(r.status)); + + if (!lax) { + return SERD_ERR_INVALID; + } + } + + if (!normo) { + return serd_sink_write_statement(data->target, flags, statement); + } + + const SerdStatus st = serd_sink_write(data->target, + flags, + statement->nodes[0], + statement->nodes[1], + normo, + statement->nodes[3]); + serd_node_free(normo); + return st; +} + +static SerdStatus +serd_canon_on_event(SerdCanonData* const data, const SerdEvent* const event) +{ + return (event->type == SERD_STATEMENT) + ? serd_canon_on_statement( + data, event->statement.flags, event->statement.statement) + : serd_sink_write_event(data->target, event); +} + +SerdSink* +serd_canon_new(const SerdWorld* const world, + const SerdSink* const target, + const SerdCanonFlags flags) +{ + SerdCanonData* const data = (SerdCanonData*)calloc(1, sizeof(SerdCanonData)); + + data->world = world; + data->target = target; + data->flags = flags; + + return serd_sink_new(data, (SerdEventFunc)serd_canon_on_event, free); +} diff --git a/src/serdi.c b/src/serdi.c index 97601b83..243be98e 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -56,6 +56,7 @@ print_usage(const char* const name, const bool error) fprintf(os, "Usage: %s [OPTION]... INPUT...\n", name); fprintf(os, "Read and write RDF syntax.\n"); fprintf(os, "Use - for INPUT to read from standard input.\n\n"); + fprintf(os, " -C Convert literals to canonical form.\n"); fprintf(os, " -I BASE_URI Input base URI.\n"); fprintf(os, " -a Write ASCII output if possible.\n"); fprintf(os, " -b Fast bulk output for large serialisations.\n"); @@ -153,6 +154,7 @@ main(int argc, char** argv) bool no_inline = false; bool osyntax_set = false; bool use_model = false; + bool canonical = false; bool quiet = false; size_t stack_size = 4194304; const char* input_string = NULL; @@ -169,7 +171,9 @@ main(int argc, char** argv) for (int o = 1; argv[a][o]; ++o) { const char opt = argv[a][o]; - if (opt == 'a') { + if (opt == 'C') { + canonical = true; + } else if (opt == 'a') { writer_flags |= SERD_WRITE_ASCII; } else if (opt == 'b') { bulk_write = true; @@ -337,7 +341,7 @@ main(int argc, char** argv) SerdModel* model = NULL; SerdSink* inserter = NULL; - const SerdSink* sink = NULL; + const SerdSink* out_sink = NULL; if (use_model) { const SerdModelFlags flags = (input_has_graphs ? SERD_STORE_GRAPHS : 0u); @@ -354,9 +358,16 @@ main(int argc, char** argv) } inserter = serd_inserter_new(model, NULL); - sink = inserter; + out_sink = inserter; } else { - sink = serd_writer_sink(writer); + out_sink = serd_writer_sink(writer); + } + + const SerdSink* sink = out_sink; + + SerdSink* canon = NULL; + if (canonical) { + sink = canon = serd_canon_new(world, out_sink, reader_flags); } if (quiet) { @@ -455,6 +466,7 @@ main(int argc, char** argv) serd_cursor_free(everything); } + serd_sink_free(canon); serd_sink_free(inserter); serd_model_free(model); serd_writer_free(writer); diff --git a/src/string.c b/src/string.c index 11b53050..6942b7b6 100644 --- a/src/string.c +++ b/src/string.c @@ -62,6 +62,8 @@ serd_strerror(const SerdStatus status) return "Invalid or unresolved URI"; case SERD_ERR_BAD_INDEX: return "No optimal model index available"; + case SERD_ERR_INVALID: + return "Invalid data"; } return "Unknown error"; diff --git a/src/string_utils.h b/src/string_utils.h index 54f7877c..5cf7ba8c 100644 --- a/src/string_utils.h +++ b/src/string_utils.h @@ -60,7 +60,7 @@ is_xdigit(const int c) } static inline bool -is_space(const char c) +is_space(const int c) { switch (c) { case ' ': @@ -89,16 +89,16 @@ is_windows_path(const char* path) } static inline char -serd_to_upper(const char c) +serd_to_lower(const char c) { - return (char)((c >= 'a' && c <= 'z') ? c - 32 : c); + return (char)((c >= 'A' && c <= 'Z') ? c + 32 : c); } static inline int serd_strncasecmp(const char* s1, const char* s2, size_t n) { for (; n > 0 && *s2; s1++, s2++, --n) { - if (serd_to_upper(*s1) != serd_to_upper(*s2)) { + if (serd_to_lower(*s1) != serd_to_lower(*s2)) { return ((*(const uint8_t*)s1 < *(const uint8_t*)s2) ? -1 : +1); } } diff --git a/test/canon/bad-boolean.ttl b/test/canon/bad-boolean.ttl new file mode 100644 index 00000000..c4fc3eb5 --- /dev/null +++ b/test/canon/bad-boolean.ttl @@ -0,0 +1,5 @@ +@base <http://example.org/> . +@prefix xsd: <http://www.w3.org/2001/XMLSchema#> . + +[] <boolean> " ja "^^xsd:boolean . + diff --git a/test/canon/bad-decimal-leading.ttl b/test/canon/bad-decimal-leading.ttl new file mode 100644 index 00000000..0d18eac7 --- /dev/null +++ b/test/canon/bad-decimal-leading.ttl @@ -0,0 +1,4 @@ +@base <http://example.org/> . +@prefix xsd: <http://www.w3.org/2001/XMLSchema#> . + +[] <decimal> " junk 1234.5678 "^^xsd:decimal . diff --git a/test/canon/bad-decimal-trailing.ttl b/test/canon/bad-decimal-trailing.ttl new file mode 100644 index 00000000..10882ef5 --- /dev/null +++ b/test/canon/bad-decimal-trailing.ttl @@ -0,0 +1,4 @@ +@base <http://example.org/> . +@prefix xsd: <http://www.w3.org/2001/XMLSchema#> . + +[] <decimal> " 1234.5678 junk "^^xsd:decimal . diff --git a/test/canon/bad-empty-boolean.ttl b/test/canon/bad-empty-boolean.ttl new file mode 100644 index 00000000..9a390c46 --- /dev/null +++ b/test/canon/bad-empty-boolean.ttl @@ -0,0 +1,5 @@ +@base <http://example.org/> . +@prefix xsd: <http://www.w3.org/2001/XMLSchema#> . + +[] <boolean> ""^^xsd:boolean . + diff --git a/test/canon/bad-integer-leading.ttl b/test/canon/bad-integer-leading.ttl new file mode 100644 index 00000000..80c1a6af --- /dev/null +++ b/test/canon/bad-integer-leading.ttl @@ -0,0 +1,4 @@ +@base <http://example.org/> . +@prefix xsd: <http://www.w3.org/2001/XMLSchema#> . + +[] <integer> " junk 987654321 "^^xsd:integer . diff --git a/test/canon/bad-integer-trailing.ttl b/test/canon/bad-integer-trailing.ttl new file mode 100644 index 00000000..a94a9ec4 --- /dev/null +++ b/test/canon/bad-integer-trailing.ttl @@ -0,0 +1,4 @@ +@base <http://example.org/> . +@prefix xsd: <http://www.w3.org/2001/XMLSchema#> . + +[] <integer> " 987654321 junk "^^xsd:integer . diff --git a/test/canon/manifest.ttl b/test/canon/manifest.ttl new file mode 100644 index 00000000..e2195212 --- /dev/null +++ b/test/canon/manifest.ttl @@ -0,0 +1,58 @@ +@prefix mf: <http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#> . +@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . +@prefix rdft: <http://www.w3.org/ns/rdftest#> . + +<> + a mf:Manifest ; + rdfs:comment "Serd canonical literal test cases" ; + mf:entries ( + <#bad-boolean> + <#bad-decimal-leading> + <#bad-decimal-trailing> + <#bad-empty-boolean> + <#bad-integer-leading> + <#bad-integer-trailing> + <#test-canon> + ) . + +<#bad-boolean> + a rdft:TestTurtleNegativeEval ; + mf:name "bad-boolean" ; + rdfs:comment "Invalid xsd::boolean syntax" ; + mf:action <bad-boolean.ttl> . + +<#bad-decimal-leading> + a rdft:TestTurtleNegativeEval ; + mf:name "bad-decimal-leading" ; + rdfs:comment "Invalid xsd::decimal syntax (leading garbage)" ; + mf:action <bad-decimal-leading.ttl> . + +<#bad-decimal-trailing> + a rdft:TestTurtleNegativeEval ; + mf:name "bad-decimal-trailing" ; + rdfs:comment "Invalid xsd::decimal syntax (trailing garbage)" ; + mf:action <bad-decimal-trailing.ttl> . + +<#bad-empty-boolean> + a rdft:TestTurtleNegativeEval ; + mf:name "bad-empty-boolean" ; + rdfs:comment "Invalid xsd::boolean syntax (no value)" ; + mf:action <bad-empty-boolean.ttl> . + +<#bad-integer-leading> + a rdft:TestTurtleNegativeEval ; + mf:name "bad-integer-leading" ; + rdfs:comment "Invalid xsd::integer syntax (leading garbage)" ; + mf:action <bad-integer-leading.ttl> . + +<#bad-integer-trailing> + a rdft:TestTurtleNegativeEval ; + mf:name "bad-integer-trailing" ; + rdfs:comment "Invalid xsd::integer syntax (trailing garbage)" ; + mf:action <bad-integer-trailing.ttl> . + +<#test-canon> + a rdft:TestTurtleEval ; + mf:name "test-canon" ; + mf:action <test-canon.ttl> ; + mf:result <test-canon.nt> . diff --git a/test/canon/test-canon.nt b/test/canon/test-canon.nt new file mode 100644 index 00000000..ff492890 --- /dev/null +++ b/test/canon/test-canon.nt @@ -0,0 +1,70 @@ +_:b1 <http://example.org/boolean> "false"^^<http://www.w3.org/2001/XMLSchema#boolean> . +_:b1 <http://example.org/boolean> "false"^^<http://www.w3.org/2001/XMLSchema#boolean> . +_:b1 <http://example.org/boolean> "true"^^<http://www.w3.org/2001/XMLSchema#boolean> . +_:b1 <http://example.org/boolean> "true"^^<http://www.w3.org/2001/XMLSchema#boolean> . +_:b1 <http://example.org/ieee754> "1.0E2"^^<http://www.w3.org/2001/XMLSchema#float> . +_:b1 <http://example.org/ieee754> "-1.0E2"^^<http://www.w3.org/2001/XMLSchema#float> . +_:b1 <http://example.org/ieee754> "1.0E3"^^<http://www.w3.org/2001/XMLSchema#double> . +_:b1 <http://example.org/ieee754> "-1.0E3"^^<http://www.w3.org/2001/XMLSchema#double> . +_:b1 <http://example.org/machine> "9223372036854775807"^^<http://www.w3.org/2001/XMLSchema#long> . +_:b1 <http://example.org/machine> "-9223372036854775808"^^<http://www.w3.org/2001/XMLSchema#long> . +_:b1 <http://example.org/machine> "2147483647"^^<http://www.w3.org/2001/XMLSchema#int> . +_:b1 <http://example.org/machine> "-2147483648"^^<http://www.w3.org/2001/XMLSchema#int> . +_:b1 <http://example.org/machine> "32767"^^<http://www.w3.org/2001/XMLSchema#short> . +_:b1 <http://example.org/machine> "-32768"^^<http://www.w3.org/2001/XMLSchema#short> . +_:b1 <http://example.org/machine> "127"^^<http://www.w3.org/2001/XMLSchema#byte> . +_:b1 <http://example.org/machine> "-128"^^<http://www.w3.org/2001/XMLSchema#byte> . +_:b1 <http://example.org/machine> "1"^^<http://www.w3.org/2001/XMLSchema#unsignedLong> . +_:b1 <http://example.org/machine> "18446744073709551615"^^<http://www.w3.org/2001/XMLSchema#unsignedLong> . +_:b1 <http://example.org/machine> "1"^^<http://www.w3.org/2001/XMLSchema#unsignedInt> . +_:b1 <http://example.org/machine> "4294967295"^^<http://www.w3.org/2001/XMLSchema#unsignedInt> . +_:b1 <http://example.org/machine> "1"^^<http://www.w3.org/2001/XMLSchema#unsignedShort> . +_:b1 <http://example.org/machine> "65535"^^<http://www.w3.org/2001/XMLSchema#unsignedShort> . +_:b1 <http://example.org/machine> "1"^^<http://www.w3.org/2001/XMLSchema#unsignedByte> . +_:b1 <http://example.org/machine> "255"^^<http://www.w3.org/2001/XMLSchema#unsignedByte> . +_:b1 <http://example.org/decimal> "0.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "0.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "-0.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "36893488147419103232.123"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "-36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "-36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "-36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "-36893488147419103232.0"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "-36893488147419103232.123"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "0.123"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "0.123"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "0.123"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "0.123"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "-0.123"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/decimal> "-0.123"^^<http://www.w3.org/2001/XMLSchema#decimal> . +_:b1 <http://example.org/integer> "36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#integer> . +_:b1 <http://example.org/integer> "36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#integer> . +_:b1 <http://example.org/integer> "36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#integer> . +_:b1 <http://example.org/integer> "36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#integer> . +_:b1 <http://example.org/integer> "-36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#integer> . +_:b1 <http://example.org/integer> "-36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#integer> . +_:b1 <http://example.org/integer> "0"^^<http://www.w3.org/2001/XMLSchema#nonPositiveInteger> . +_:b1 <http://example.org/integer> "-36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#nonPositiveInteger> . +_:b1 <http://example.org/integer> "-1"^^<http://www.w3.org/2001/XMLSchema#negativeInteger> . +_:b1 <http://example.org/integer> "-36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#negativeInteger> . +_:b1 <http://example.org/integer> "0"^^<http://www.w3.org/2001/XMLSchema#nonNegativeInteger> . +_:b1 <http://example.org/integer> "36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#nonNegativeInteger> . +_:b1 <http://example.org/integer> "1"^^<http://www.w3.org/2001/XMLSchema#positiveInteger> . +_:b1 <http://example.org/integer> "36893488147419103232"^^<http://www.w3.org/2001/XMLSchema#positiveInteger> . +_:b1 <http://example.org/langString> "no language tag" . +_:b1 <http://example.org/taggedString> "english"@en-ca . +_:b1 <http://example.org/time> "P1Y6M"^^<http://www.w3.org/2001/XMLSchema#duration> . +_:b1 <http://example.org/time> "12:15:01Z"^^<http://www.w3.org/2001/XMLSchema#time> . +_:b1 <http://example.org/time> "2004-04-12Z"^^<http://www.w3.org/2001/XMLSchema#date> . +_:b1 <http://example.org/binary> "A1B7F080"^^<http://www.w3.org/2001/XMLSchema#hexBinary> . +_:b1 <http://example.org/binary> "Zm9vYmF="^^<http://www.w3.org/2001/XMLSchema#base64Binary> . +_:b1 <http://example.org/other> "untyped" . +_:b1 <http://example.org/other> <http://example.org/uri> . +_:b1 <http://example.org/other> "notxsd"^^<http://example.org/sometype> . +_:b1 <http://example.org/other> "unsupported"^^<http://www.w3.org/2001/XMLSchema#name> . diff --git a/test/canon/test-canon.ttl b/test/canon/test-canon.ttl new file mode 100644 index 00000000..0d0b4682 --- /dev/null +++ b/test/canon/test-canon.ttl @@ -0,0 +1,76 @@ +@base <http://example.org/> . +@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . +@prefix xsd: <http://www.w3.org/2001/XMLSchema#> . + +[ + <boolean> " false "^^xsd:boolean , + " 0 "^^xsd:boolean , + " true "^^xsd:boolean , + " 1 "^^xsd:boolean ; + <ieee754> " +0100.0 "^^xsd:float , + " -0100.0 "^^xsd:float , + " +01000.0 "^^xsd:double , + " -01000.0 "^^xsd:double ; + <machine> " +09223372036854775807 "^^xsd:long , + " -09223372036854775808 "^^xsd:long , + " +02147483647 "^^xsd:int , + " -02147483648 "^^xsd:int , + " +032767 "^^xsd:short , + " -032768 "^^xsd:short , + " +0127 "^^xsd:byte , + " -0128 "^^xsd:byte , + " 01 "^^xsd:unsignedLong , + " 018446744073709551615 "^^xsd:unsignedLong , + " 01 "^^xsd:unsignedInt , + " 04294967295 "^^xsd:unsignedInt , + " 01 "^^xsd:unsignedShort , + " 065535 "^^xsd:unsignedShort , + " 01 "^^xsd:unsignedByte , + " 0255 "^^xsd:unsignedByte ; + <decimal> " 00 "^^xsd:decimal , + " +0 "^^xsd:decimal , + " -0 "^^xsd:decimal , + " 36893488147419103232 "^^xsd:decimal , + " 0036893488147419103232 "^^xsd:decimal , + " +36893488147419103232 "^^xsd:decimal , + " +0036893488147419103232 "^^xsd:decimal , + " +0036893488147419103232. "^^xsd:decimal , + " +0036893488147419103232.00 "^^xsd:decimal , + " +0036893488147419103232.12300 "^^xsd:decimal , + " -36893488147419103232 "^^xsd:decimal , + " -0036893488147419103232 "^^xsd:decimal , + " -0036893488147419103232. "^^xsd:decimal , + " -0036893488147419103232.00 "^^xsd:decimal , + " -0036893488147419103232.12300 "^^xsd:decimal , + " 00.12300 "^^xsd:decimal , + " .12300 "^^xsd:decimal , + " +.12300 "^^xsd:decimal , + " +00.12300 "^^xsd:decimal , + " -.12300 "^^xsd:decimal , + " -00.12300 "^^xsd:decimal ; + <integer> " 36893488147419103232 "^^xsd:integer , + " 0036893488147419103232 "^^xsd:integer , + " +36893488147419103232 "^^xsd:integer , + " +0036893488147419103232 "^^xsd:integer , + " -36893488147419103232 "^^xsd:integer , + " -0036893488147419103232 "^^xsd:integer , + " 00 "^^xsd:nonPositiveInteger , + " -036893488147419103232 "^^xsd:nonPositiveInteger , + " -01 "^^xsd:negativeInteger , + " -036893488147419103232 "^^xsd:negativeInteger , + " 00 "^^xsd:nonNegativeInteger , + " 036893488147419103232 "^^xsd:nonNegativeInteger , + " +01 "^^xsd:positiveInteger , + " 036893488147419103232 "^^xsd:positiveInteger ; + <langString> "no language tag"^^rdf:langString ; + <taggedString> "english"@EN-CA ; + <time> " P1Y6M0D "^^xsd:duration , + " 12:15:01+00:00 "^^xsd:time , + " 02004-04-12+00:00 "^^xsd:date ; + <binary> "A 1 B7 F080"^^xsd:hexBinary , + " Zm 9v Y m F="^^xsd:base64Binary ; + <other> "untyped" , + <uri> , + "notxsd"^^<sometype> , + "unsupported"^^xsd:name +] . diff --git a/test/meson.build b/test/meson.build index c4dc4b3b..e8392559 100644 --- a/test/meson.build +++ b/test/meson.build @@ -249,6 +249,14 @@ if get_option('utils') suite: ['rdf', 'serd'], timeout: 240) + manifest = files('canon' / 'manifest.ttl') + base_uri = serd_base + 'canon' + '/' + test('canon', run_test_suite, + args: script_args + [manifest, base_uri, '--', '-C'], + env: test_env, + suite: ['rdf', 'serd'], + timeout: 240) + ### The lax suite is special because it is run twice... lax_manifest = files('lax/manifest.ttl') lax_base_uri = serd_base + name + '/' diff --git a/test/test_string.c b/test/test_string.c index 2bd47680..f8534001 100644 --- a/test/test_string.c +++ b/test/test_string.c @@ -32,7 +32,7 @@ test_strerror(void) { const char* msg = serd_strerror(SERD_SUCCESS); assert(!strcmp(msg, "Success")); - for (int i = SERD_FAILURE; i <= SERD_ERR_BAD_INDEX; ++i) { + for (int i = SERD_FAILURE; i <= SERD_ERR_INVALID; ++i) { msg = serd_strerror((SerdStatus)i); assert(strcmp(msg, "Success")); } |