diff options
author | David Robillard <d@drobilla.net> | 2021-07-22 15:26:22 -0400 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2023-12-02 18:49:08 -0500 |
commit | 5e4538756d601e6a941c5290777af95ea8848e1a (patch) | |
tree | 9868e188a48a528e9908fcf695147f75790c3a56 | |
parent | 64024d0fa6a6dc048b2b846738846da597025f56 (diff) | |
download | serd-5e4538756d601e6a941c5290777af95ea8848e1a.tar.gz serd-5e4538756d601e6a941c5290777af95ea8848e1a.tar.bz2 serd-5e4538756d601e6a941c5290777af95ea8848e1a.zip |
[WIP] Preserve long or short quoting from input documents
34 files changed, 239 insertions, 290 deletions
@@ -9,6 +9,7 @@ serd (1.1.1) unstable; urgency=medium * Bring read/write interface closer to C standard * Make nodes opaque * Preserve anonymous graph syntax in TriG + * Preserve long or short quoting from input documents * Remove SERD_DISABLE_DEPRECATED and SERD_DEPRECATED_BY * Remove serd_uri_to_path() * Remove support for reading Turtle named inline nodes extension diff --git a/include/serd/node.h b/include/serd/node.h index 1faba160..90ecde71 100644 --- a/include/serd/node.h +++ b/include/serd/node.h @@ -104,10 +104,9 @@ typedef enum { /// Node flags, which ORed together make a #SerdNodeFlags typedef enum { - SERD_HAS_NEWLINE = 1U << 0U, ///< Contains line breaks ('\\n' or '\\r') - SERD_HAS_QUOTE = 1U << 1U, ///< Contains quotes ('"') - SERD_HAS_DATATYPE = 1U << 2U, ///< Literal node has datatype - SERD_HAS_LANGUAGE = 1U << 3U, ///< Literal node has language + SERD_IS_LONG = 1U << 0U, ///< Literal node should be triple-quoted + SERD_HAS_DATATYPE = 1U << 1U, ///< Literal node has datatype + SERD_HAS_LANGUAGE = 1U << 2U, ///< Literal node has language } SerdNodeFlag; /// Bitwise OR of #SerdNodeFlag values @@ -135,23 +134,30 @@ SERD_API SerdNode* ZIX_ALLOCATED serd_new_string(SerdStringView string); /** - Create a new plain literal node from `str` with `lang`. + Create a new literal node with optional datatype or language. - A plain literal has no datatype, but may have a language tag. The `lang` - may be empty, in which case this is equivalent to `serd_new_string()`. -*/ -SERD_API SerdNode* ZIX_ALLOCATED -serd_new_plain_literal(SerdStringView str, SerdStringView lang); + This can create more complex literals than serd_new_string() with an + associated datatype URI or language tag, as well as control whether a + literal should be written as a short or long (triple-quoted) string. -/** - Create a new typed literal node from `str`. + @param string The string value of the literal. + + @param flags Flags to describe the literal and its metadata. This must be a + valid combination of flags, in particular, at most one of #SERD_HAS_DATATYPE + and #SERD_HAS_LANGUAGE may be set. + + @param meta The string value of the literal's metadata. If + #SERD_HAS_DATATYPE is set, then this must be an absolute datatype URI. If + #SERD_HAS_LANGUAGE is set, then this must be a language tag like "en-ca". + Otherwise, it is ignored. - A typed literal has no language tag, but may have a datatype. The - `datatype` may be NULL, in which case this is equivalent to - `serd_new_string()`. + @return A newly allocated literal node that must be freed with + serd_node_free(), or null if the arguments are invalid or allocation failed. */ SERD_API SerdNode* ZIX_ALLOCATED -serd_new_typed_literal(SerdStringView str, SerdStringView datatype_uri); +serd_new_literal(SerdStringView string, + SerdNodeFlags flags, + SerdStringView meta); /** Create a new node from a blank node label. diff --git a/include/serd/string.h b/include/serd/string.h index 46da939d..11e55e6e 100644 --- a/include/serd/string.h +++ b/include/serd/string.h @@ -5,7 +5,6 @@ #define SERD_STRING_H #include "serd/attributes.h" -#include "serd/node.h" #include "zix/attributes.h" #include <stddef.h> @@ -19,16 +18,6 @@ SERD_BEGIN_DECLS */ /** - Measure a UTF-8 string. - - @return Length of `str` in bytes. - @param str A null-terminated UTF-8 string. - @param flags (Output) Set to the applicable flags. -*/ -SERD_API size_t -serd_strlen(const char* ZIX_NONNULL str, SerdNodeFlags* ZIX_NULLABLE flags); - -/** Decode a base64 string. This function can be used to decode a node created with serd_new_base64(). @@ -11,7 +11,6 @@ #include "serd/buffer.h" #include "serd/node.h" #include "serd/status.h" -#include "serd/string.h" #include "serd/string_view.h" #include "serd/uri.h" #include "serd/write_result.h" @@ -195,88 +194,92 @@ serd_new_token(const SerdNodeType type, const SerdStringView str) SerdNode* serd_new_string(const SerdStringView str) { - SerdNodeFlags flags = 0; - const size_t length = serd_substrlen(str.data, str.length, &flags); - SerdNode* node = serd_node_malloc(length, flags, SERD_LITERAL); - - memcpy(serd_node_buffer(node), str.data, str.length); - node->length = length; - - serd_node_check_padding(node); - return node; -} - -/// Internal pre-measured implementation of serd_new_plain_literal -static SerdNode* -serd_new_plain_literal_i(const SerdStringView str, - SerdNodeFlags flags, - const SerdStringView lang) -{ - assert(str.length); - assert(lang.length); - - flags |= SERD_HAS_LANGUAGE; - - const size_t len = serd_node_pad_length(str.length); - const size_t total_len = len + sizeof(SerdNode) + lang.length; + SerdNodeFlags flags = 0U; + SerdNode* node = serd_node_malloc(str.length, flags, SERD_LITERAL); - SerdNode* node = serd_node_malloc(total_len, flags, SERD_LITERAL); - memcpy(serd_node_buffer(node), str.data, str.length); - node->length = str.length; + if (node) { + if (str.data && str.length) { + memcpy(serd_node_buffer(node), str.data, str.length); + } - SerdNode* lang_node = node + 1 + (len / sizeof(SerdNode)); - lang_node->type = SERD_LITERAL; - lang_node->length = lang.length; - memcpy(serd_node_buffer(lang_node), lang.data, lang.length); - serd_node_check_padding(lang_node); + node->length = str.length; + serd_node_check_padding(node); + } - serd_node_check_padding(node); return node; } -SerdNode* -serd_new_plain_literal(const SerdStringView str, const SerdStringView lang) +ZIX_PURE_FUNC static bool +is_langtag(const SerdStringView string) { - if (!lang.length) { - return serd_new_string(str); + // First character must be a letter + size_t i = 0; + if (!string.length || !is_alpha(string.data[i])) { + return false; } - SerdNodeFlags flags = 0; - serd_strlen(str.data, &flags); + // First component must be all letters + while (++i < string.length && string.data[i] && string.data[i] != '-') { + if (!is_alpha(string.data[i])) { + return false; + } + } - return serd_new_plain_literal_i(str, flags, lang); + // Following components can have letters and digits + while (i < string.length && string.data[i] == '-') { + while (++i < string.length && string.data[i] && string.data[i] != '-') { + const char c = string.data[i]; + if (!is_alpha(c) && !is_digit(c)) { + return false; + } + } + } + + return true; } SerdNode* -serd_new_typed_literal(const SerdStringView str, - const SerdStringView datatype_uri) +serd_new_literal(const SerdStringView string, + const SerdNodeFlags flags, + const SerdStringView meta) { - if (!datatype_uri.length) { - return serd_new_string(str); + if (!(flags & (SERD_HAS_DATATYPE | SERD_HAS_LANGUAGE))) { + SerdNode* node = serd_node_malloc(string.length, flags, SERD_LITERAL); + + memcpy(serd_node_buffer(node), string.data, string.length); + node->length = string.length; + serd_node_check_padding(node); + return node; } - if (!strcmp(datatype_uri.data, NS_RDF "langString")) { + if ((flags & SERD_HAS_DATATYPE) && (flags & SERD_HAS_LANGUAGE)) { return NULL; } - SerdNodeFlags flags = 0U; - serd_strlen(str.data, &flags); + if (!meta.length) { + return NULL; + } - flags |= SERD_HAS_DATATYPE; + if (((flags & SERD_HAS_DATATYPE) && + (!serd_uri_string_has_scheme(meta.data) || + !strcmp(meta.data, NS_RDF "langString"))) || + ((flags & SERD_HAS_LANGUAGE) && !is_langtag(meta))) { + return NULL; + } - const size_t len = serd_node_pad_length(str.length); - const size_t total_len = len + sizeof(SerdNode) + datatype_uri.length; + const size_t len = serd_node_pad_length(string.length); + const size_t meta_len = serd_node_pad_length(meta.length); + const size_t meta_size = sizeof(SerdNode) + meta_len; - SerdNode* node = serd_node_malloc(total_len, flags, SERD_LITERAL); - memcpy(serd_node_buffer(node), str.data, str.length); - node->length = str.length; + SerdNode* node = serd_node_malloc(len + meta_size, flags, SERD_LITERAL); + memcpy(serd_node_buffer(node), string.data, string.length); + node->length = string.length; - SerdNode* datatype_node = node + 1 + (len / sizeof(SerdNode)); - datatype_node->length = datatype_uri.length; - datatype_node->type = SERD_URI; - memcpy( - serd_node_buffer(datatype_node), datatype_uri.data, datatype_uri.length); - serd_node_check_padding(datatype_node); + SerdNode* meta_node = node + 1U + (len / sizeof(SerdNode)); + meta_node->length = meta.length; + meta_node->type = (flags & SERD_HAS_DATATYPE) ? SERD_URI : SERD_LITERAL; + memcpy(serd_node_buffer(meta_node), meta.data, meta.length); + serd_node_check_padding(meta_node); serd_node_check_padding(node); return node; @@ -548,13 +551,6 @@ typedef size_t (*SerdWriteLiteralFunc)(const void* user_data, size_t buf_size, char* buf); -SerdNode* -serd_new_boolean(bool b) -{ - return serd_new_typed_literal(b ? serd_string("true") : serd_string("false"), - serd_node_string_view(&serd_xsd_boolean.node)); -} - static SerdNode* serd_new_custom_literal(const void* const user_data, const size_t len, @@ -589,8 +585,9 @@ serd_new_double(const double d) const ExessResult r = exess_write_double(d, sizeof(buf), buf); return r.status ? NULL - : serd_new_typed_literal(serd_substring(buf, r.count), - serd_string(EXESS_XSD_URI "double")); + : serd_new_literal(serd_substring(buf, r.count), + SERD_HAS_DATATYPE, + serd_string(EXESS_XSD_URI "double")); } SerdNode* @@ -601,8 +598,17 @@ serd_new_float(const float f) const ExessResult r = exess_write_float(f, sizeof(buf), buf); return r.status ? NULL - : serd_new_typed_literal(serd_substring(buf, r.count), - serd_string(EXESS_XSD_URI "float")); + : serd_new_literal(serd_substring(buf, r.count), + SERD_HAS_DATATYPE, + serd_string(EXESS_XSD_URI "float")); +} + +SerdNode* +serd_new_boolean(bool b) +{ + return serd_new_literal(b ? serd_string("true") : serd_string("false"), + SERD_HAS_DATATYPE, + serd_node_string_view(&serd_xsd_boolean.node)); } SerdNode* diff --git a/src/read_ntriples.c b/src/read_ntriples.c index bec59c13..6822b64f 100644 --- a/src/read_ntriples.c +++ b/src/read_ntriples.c @@ -190,24 +190,8 @@ read_IRI(SerdReader* const reader, SerdNode** const dest) SerdStatus read_character(SerdReader* const reader, SerdNode* const dest, const uint8_t c) { - if (!(c & 0x80)) { - switch (c) { - case 0xA: - case 0xD: - dest->flags |= SERD_HAS_NEWLINE; - break; - case '"': - case '\'': - dest->flags |= SERD_HAS_QUOTE; - break; - default: - break; - } - - return push_byte(reader, dest, c); - } - - return read_utf8_continuation(reader, dest, c); + return !(c & 0x80) ? push_byte(reader, dest, c) + : read_utf8_continuation(reader, dest, c); } SerdStatus @@ -423,10 +407,8 @@ read_ECHAR(SerdReader* const reader, SerdNode* const dest) case 'b': return (st = skip_byte(reader, 'b')) ? st : push_byte(reader, dest, '\b'); case 'n': - dest->flags |= SERD_HAS_NEWLINE; return (st = skip_byte(reader, 'n')) ? st : push_byte(reader, dest, '\n'); case 'r': - dest->flags |= SERD_HAS_NEWLINE; return (st = skip_byte(reader, 'r')) ? st : push_byte(reader, dest, '\r'); case 'f': return (st = skip_byte(reader, 'f')) ? st : push_byte(reader, dest, '\f'); diff --git a/src/read_turtle.c b/src/read_turtle.c index c3970a1e..22269741 100644 --- a/src/read_turtle.c +++ b/src/read_turtle.c @@ -115,7 +115,6 @@ read_STRING_LITERAL_LONG(SerdReader* const reader, push_byte(reader, ref, c); st = read_string_escape(reader, ref); } else { - ref->flags |= SERD_HAS_QUOTE; if (!(st = push_byte(reader, ref, c))) { st = read_character(reader, ref, (uint8_t)q2); } @@ -151,7 +150,10 @@ read_String(SerdReader* const reader, SerdNode* const node) return SERD_SUCCESS; } + // Long string skip_byte(reader, q3); + node->flags |= SERD_IS_LONG; + return read_STRING_LITERAL_LONG(reader, node, (uint8_t)q1); } diff --git a/src/string.c b/src/string.c index 8cc839bd..ed3149d0 100644 --- a/src/string.c +++ b/src/string.c @@ -1,16 +1,10 @@ // Copyright 2011-2020 David Robillard <d@drobilla.net> // SPDX-License-Identifier: ISC -#include "string_utils.h" - #include "serd/memory.h" -#include "serd/node.h" #include "serd/status.h" -#include "serd/string.h" -#include <assert.h> #include <stdlib.h> -#include <string.h> void serd_free(void* const ptr) @@ -68,50 +62,3 @@ serd_strerror(const SerdStatus status) return "Unknown error"; } - -static void -serd_update_flags(const char c, SerdNodeFlags* const flags) -{ - switch (c) { - case '\r': - case '\n': - *flags |= SERD_HAS_NEWLINE; - break; - case '"': - *flags |= SERD_HAS_QUOTE; - break; - default: - break; - } -} - -size_t -serd_substrlen(const char* const str, - const size_t len, - SerdNodeFlags* const flags) -{ - assert(flags); - - size_t i = 0; - *flags = 0; - for (; i < len && str[i]; ++i) { - serd_update_flags(str[i], flags); - } - - return i; -} - -size_t -serd_strlen(const char* const str, SerdNodeFlags* const flags) -{ - if (flags) { - size_t i = 0; - *flags = 0; - for (; str[i]; ++i) { - serd_update_flags(str[i], flags); - } - return i; - } - - return strlen(str); -} diff --git a/src/string_utils.h b/src/string_utils.h index 9de03fa0..2517b270 100644 --- a/src/string_utils.h +++ b/src/string_utils.h @@ -4,8 +4,6 @@ #ifndef SERD_SRC_STRING_UTILS_H #define SERD_SRC_STRING_UTILS_H -#include "serd/node.h" - #include <stdbool.h> #include <stddef.h> #include <stdint.h> @@ -97,9 +95,6 @@ is_windows_path(const char* path) (path[2] == '/' || path[2] == '\\'); } -size_t -serd_substrlen(const char* str, size_t len, SerdNodeFlags* flags); - static inline uint8_t hex_digit_value(const uint8_t c) { diff --git a/src/uri_utils.h b/src/uri_utils.h index 004129d2..76060d6a 100644 --- a/src/uri_utils.h +++ b/src/uri_utils.h @@ -4,10 +4,12 @@ #ifndef SERD_SRC_URI_UTILS_H #define SERD_SRC_URI_UTILS_H -#include "serd/attributes.h" - #include "string_utils.h" +#include "serd/attributes.h" +#include "serd/string_view.h" +#include "serd/uri.h" + #include <stdbool.h> #include <string.h> diff --git a/src/writer.c b/src/writer.c index 94c75625..329a29ad 100644 --- a/src/writer.c +++ b/src/writer.c @@ -778,8 +778,7 @@ write_literal(SerdWriter* const writer, } } - if (supports_abbrev(writer) && - (node->flags & (SERD_HAS_NEWLINE | SERD_HAS_QUOTE))) { + if (supports_abbrev(writer) && (node->flags & SERD_IS_LONG)) { TRY(st, esink("\"\"\"", 3, writer)); TRY(st, write_text(writer, WRITE_LONG_STRING, node_str, node->length)); TRY(st, esink("\"\"\"", 3, writer)); diff --git a/test/extra/good/manifest.ttl b/test/extra/good/manifest.ttl index bce7c564..7c356285 100644 --- a/test/extra/good/manifest.ttl +++ b/test/extra/good/manifest.ttl @@ -23,10 +23,15 @@ <#test-double> <#test-empty-path-base> <#test-eof-at-page-end> + <#test-escapes> <#test-id> <#test-list-in-blank> <#test-list-subject> + <#test-long-backspace-escape> + <#test-long-delete-escape> + <#test-long-form-feed-escape> <#test-long-utf8> + <#test-long-whitespace> <#test-no-spaces> <#test-non-curie-uri> <#test-prefix> @@ -140,6 +145,12 @@ mf:name "test-eof-at-page-end" ; mf:result <test-eof-at-page-end.nt> . +<#test-escapes> + a rdft:TestTurtleEval ; + mf:action <test-escapes.ttl> ; + mf:name "test-escapes" ; + mf:result <test-escapes.nt> . + <#test-id> a rdft:TestTurtleEval ; mf:action <test-id.ttl> ; @@ -158,12 +169,36 @@ mf:name "test-list-subject" ; mf:result <test-list-subject.nt> . +<#test-long-backspace-escape> + a rdft:TestTurtleEval ; + mf:action <test-long-backspace-escape.ttl> ; + mf:name "test-long-backspace-escape" ; + mf:result <test-long-backspace-escape.nt> . + +<#test-long-delete-escape> + a rdft:TestTurtleEval ; + mf:action <test-long-delete-escape.ttl> ; + mf:name "test-long-delete-escape" ; + mf:result <test-long-delete-escape.nt> . + +<#test-long-form-feed-escape> + a rdft:TestTurtleEval ; + mf:action <test-long-form-feed-escape.ttl> ; + mf:name "test-long-form-feed-escape" ; + mf:result <test-long-form-feed-escape.nt> . + <#test-long-utf8> a rdft:TestTurtleEval ; mf:action <test-long-utf8.ttl> ; mf:name "test-long-utf8" ; mf:result <test-long-utf8.nt> . +<#test-long-whitespace> + a rdft:TestTurtleEval ; + mf:action <test-long-whitespace.ttl> ; + mf:name "test-long-whitespace" ; + mf:result <test-long-whitespace.nt> . + <#test-no-spaces> a rdft:TestTurtleEval ; mf:action <test-no-spaces.ttl> ; diff --git a/test/extra/perfect/test-escapes.nt b/test/extra/good/test-escapes.nt index 2780d976..2780d976 100644 --- a/test/extra/perfect/test-escapes.nt +++ b/test/extra/good/test-escapes.nt diff --git a/test/extra/perfect/test-escapes.ttl b/test/extra/good/test-escapes.ttl index b8fcce7a..b8fcce7a 100644 --- a/test/extra/perfect/test-escapes.ttl +++ b/test/extra/good/test-escapes.ttl diff --git a/test/extra/good/test-long-backspace-escape.nt b/test/extra/good/test-long-backspace-escape.nt new file mode 100644 index 00000000..74e8e277 --- /dev/null +++ b/test/extra/good/test-long-backspace-escape.nt @@ -0,0 +1 @@ +<http://example.org/eg#s> <http://example.org/eg#p> "long\n\u0008\n" . diff --git a/test/extra/good/test-long-backspace-escape.ttl b/test/extra/good/test-long-backspace-escape.ttl new file mode 100644 index 00000000..c4b604cb --- /dev/null +++ b/test/extra/good/test-long-backspace-escape.ttl @@ -0,0 +1,4 @@ +<http://example.org/eg#s> + <http://example.org/eg#p> """long +\b +""" . diff --git a/test/extra/good/test-long-delete-escape.nt b/test/extra/good/test-long-delete-escape.nt new file mode 100644 index 00000000..25ed9ab3 --- /dev/null +++ b/test/extra/good/test-long-delete-escape.nt @@ -0,0 +1 @@ +<http://example.org/eg#s> <http://example.org/eg#p> "long\n\u007F\n" . diff --git a/test/extra/good/test-long-delete-escape.ttl b/test/extra/good/test-long-delete-escape.ttl new file mode 100644 index 00000000..11bc2b21 --- /dev/null +++ b/test/extra/good/test-long-delete-escape.ttl @@ -0,0 +1,4 @@ +<http://example.org/eg#s> + <http://example.org/eg#p> """long +\u007F +""" . diff --git a/test/extra/good/test-long-form-feed-escape.nt b/test/extra/good/test-long-form-feed-escape.nt new file mode 100644 index 00000000..d16c6da9 --- /dev/null +++ b/test/extra/good/test-long-form-feed-escape.nt @@ -0,0 +1 @@ +<http://example.org/thing> <http://example.org/label> "long\n\u000C\n" . diff --git a/test/extra/good/test-long-form-feed-escape.ttl b/test/extra/good/test-long-form-feed-escape.ttl new file mode 100644 index 00000000..f62ec19e --- /dev/null +++ b/test/extra/good/test-long-form-feed-escape.ttl @@ -0,0 +1,4 @@ +<http://example.org/thing> + <http://example.org/label> """long + +""" . diff --git a/test/extra/good/test-long-whitespace.nt b/test/extra/good/test-long-whitespace.nt new file mode 100644 index 00000000..09664b37 --- /dev/null +++ b/test/extra/good/test-long-whitespace.nt @@ -0,0 +1,2 @@ +<http://example.org/eg#a> <http://example.org/eg#b> "\nthis \ris a \U00015678long\t\nliteral\uABCD\n" . +<http://example.org/eg#d> <http://example.org/eg#e> "\tThis \uABCDis\r \U00015678another\n\none\n" . diff --git a/test/extra/perfect/test-long-whitespace.ttl b/test/extra/good/test-long-whitespace.ttl index 9c3f946c..9c3f946c 100644 --- a/test/extra/perfect/test-long-whitespace.ttl +++ b/test/extra/good/test-long-whitespace.ttl diff --git a/test/extra/perfect/manifest.ttl b/test/extra/perfect/manifest.ttl index 572f55f2..66dd7547 100644 --- a/test/extra/perfect/manifest.ttl +++ b/test/extra/perfect/manifest.ttl @@ -10,13 +10,11 @@ <#test-decimal> <#test-delete-escape> <#test-empty> - <#test-escapes> <#test-exact-uri> <#test-form-feed-escape> <#test-integer> <#test-lang> <#test-list> - <#test-long-whitespace> <#test-triple> <#test-uri-escape> <#test-uri-query> @@ -48,12 +46,6 @@ mf:name "test-empty" ; mf:result <test-empty.nt> . -<#test-escapes> - a rdft:TestTurtleEval ; - mf:action <test-escapes.ttl> ; - mf:name "test-escapes" ; - mf:result <test-escapes.nt> . - <#test-exact-uri> a rdft:TestTurtleEval ; mf:action <test-exact-uri.ttl> ; @@ -84,12 +76,6 @@ mf:name "test-list" ; mf:result <test-list.nt> . -<#test-long-whitespace> - a rdft:TestTurtleEval ; - mf:action <test-long-whitespace.ttl> ; - mf:name "test-long-whitespace" ; - mf:result <test-long-whitespace.nt> . - <#test-triple> a rdft:TestTurtleEval ; mf:action <test-triple.ttl> ; diff --git a/test/extra/perfect/test-backspace-escape.nt b/test/extra/perfect/test-backspace-escape.nt index dd1fda93..f0b894a2 100644 --- a/test/extra/perfect/test-backspace-escape.nt +++ b/test/extra/perfect/test-backspace-escape.nt @@ -1,3 +1,2 @@ <http://example.org/eg#s> <http://example.org/eg#p> "\u0008 first" . <http://example.org/eg#s> <http://example.org/eg#p> "last \u0008" . -<http://example.org/eg#s> <http://example.org/eg#p> "long\n\u0008\n" . diff --git a/test/extra/perfect/test-backspace-escape.ttl b/test/extra/perfect/test-backspace-escape.ttl index a92a9252..ab9c8314 100644 --- a/test/extra/perfect/test-backspace-escape.ttl +++ b/test/extra/perfect/test-backspace-escape.ttl @@ -1,6 +1,3 @@ <http://example.org/eg#s> <http://example.org/eg#p> "\b first" , - "last \b" , - """long -\b -""" . + "last \b" . diff --git a/test/extra/perfect/test-delete-escape.nt b/test/extra/perfect/test-delete-escape.nt index 1fd30590..f3d92525 100644 --- a/test/extra/perfect/test-delete-escape.nt +++ b/test/extra/perfect/test-delete-escape.nt @@ -1,3 +1,2 @@ <http://example.org/eg#s> <http://example.org/eg#p> "last \u007F" . -<http://example.org/eg#s> <http://example.org/eg#p> "long\n\u007F\n" . <http://example.org/eg#s> <http://example.org/eg#p> "\u007F first" . diff --git a/test/extra/perfect/test-delete-escape.ttl b/test/extra/perfect/test-delete-escape.ttl index a2711920..019fc47a 100644 --- a/test/extra/perfect/test-delete-escape.ttl +++ b/test/extra/perfect/test-delete-escape.ttl @@ -1,6 +1,3 @@ <http://example.org/eg#s> <http://example.org/eg#p> "last \u007F" , - """long -\u007F -""" , "\u007F first" . diff --git a/test/extra/perfect/test-form-feed-escape.nt b/test/extra/perfect/test-form-feed-escape.nt index 7848ec5b..6606fb07 100644 --- a/test/extra/perfect/test-form-feed-escape.nt +++ b/test/extra/perfect/test-form-feed-escape.nt @@ -1,3 +1,2 @@ <http://example.org/eg#s> <http://example.org/eg#p> "\u000C first" . <http://example.org/eg#s> <http://example.org/eg#p> "last \u000C" . -<http://example.org/eg#s> <http://example.org/eg#p> "long\n\u000C\n" . diff --git a/test/extra/perfect/test-form-feed-escape.ttl b/test/extra/perfect/test-form-feed-escape.ttl index 73c78a5e..0b38971e 100644 --- a/test/extra/perfect/test-form-feed-escape.ttl +++ b/test/extra/perfect/test-form-feed-escape.ttl @@ -1,6 +1,3 @@ <http://example.org/eg#s> <http://example.org/eg#p> "\f first" , - "last \f" , - """long - -""" . + "last \f" . diff --git a/test/extra/perfect/test-long-whitespace.nt b/test/extra/perfect/test-long-whitespace.nt deleted file mode 100644 index fca880d1..00000000 --- a/test/extra/perfect/test-long-whitespace.nt +++ /dev/null @@ -1,2 +0,0 @@ -<http://example.org/eg#a> <http://example.org/eg#b> "\nthis \ris a 𕙸long\t\nliteralꯍ\n" . -<http://example.org/eg#d> <http://example.org/eg#e> "\tThis ꯍis\r 𕙸another\n\none\n" . diff --git a/test/test_env.c b/test/test_env.c index d2c80343..d45f2ef6 100644 --- a/test/test_env.c +++ b/test/test_env.c @@ -161,8 +161,10 @@ test_expand_bad_uri_datatype(void) { const SerdStringView type = serd_string("Type"); - SerdNode* const typed = serd_new_typed_literal(serd_string("data"), type); - SerdEnv* const env = serd_env_new(serd_empty_string()); + SerdNode* const typed = + serd_new_literal(serd_string("data"), SERD_HAS_DATATYPE, type); + + SerdEnv* const env = serd_env_new(serd_empty_string()); assert(!serd_env_expand(env, typed)); diff --git a/test/test_node.c b/test/test_node.c index 7d37cfe5..7bdc83ec 100644 --- a/test/test_node.c +++ b/test/test_node.c @@ -75,8 +75,8 @@ check_get_boolean(const char* string, const char* datatype_uri, const bool expected) { - SerdNode* const node = - serd_new_typed_literal(serd_string(string), serd_string(datatype_uri)); + SerdNode* const node = serd_new_literal( + serd_string(string), SERD_HAS_DATATYPE, serd_string(datatype_uri)); assert(node); assert(serd_get_boolean(node) == expected); @@ -162,8 +162,8 @@ check_get_double(const char* string, const char* datatype_uri, const double expected) { - SerdNode* const node = - serd_new_typed_literal(serd_string(string), serd_string(datatype_uri)); + SerdNode* const node = serd_new_literal( + serd_string(string), SERD_HAS_DATATYPE, serd_string(datatype_uri)); assert(node); @@ -191,8 +191,8 @@ test_get_double(void) assert(isnan(serd_get_double(nan))); serd_node_free(nan); - SerdNode* const invalid = - serd_new_typed_literal(serd_string("!invalid"), serd_string(NS_XSD "long")); + SerdNode* const invalid = serd_new_literal( + serd_string("!invalid"), SERD_HAS_DATATYPE, serd_string(NS_XSD "long")); assert(isnan(serd_get_double(invalid))); serd_node_free(invalid); @@ -235,8 +235,8 @@ check_get_float(const char* string, const char* datatype_uri, const float expected) { - SerdNode* const node = - serd_new_typed_literal(serd_string(string), serd_string(datatype_uri)); + SerdNode* const node = serd_new_literal( + serd_string(string), SERD_HAS_DATATYPE, serd_string(datatype_uri)); assert(node); @@ -262,8 +262,8 @@ test_get_float(void) assert(isnan(serd_get_float(nan))); serd_node_free(nan); - SerdNode* const invalid = - serd_new_typed_literal(serd_string("!invalid"), serd_string(NS_XSD "long")); + SerdNode* const invalid = serd_new_literal( + serd_string("!invalid"), SERD_HAS_DATATYPE, serd_string(NS_XSD "long")); assert(isnan(serd_get_double(invalid))); @@ -300,8 +300,8 @@ check_get_integer(const char* string, const char* datatype_uri, const int64_t expected) { - SerdNode* const node = - serd_new_typed_literal(serd_string(string), serd_string(datatype_uri)); + SerdNode* const node = serd_new_literal( + serd_string(string), SERD_HAS_DATATYPE, serd_string(datatype_uri)); assert(node); assert(serd_get_integer(node) == expected); @@ -364,8 +364,8 @@ check_get_base64(const char* string, const char* datatype_uri, const char* expected) { - SerdNode* const node = - serd_new_typed_literal(serd_string(string), serd_string(datatype_uri)); + SerdNode* const node = serd_new_literal( + serd_string(string), SERD_HAS_DATATYPE, serd_string(datatype_uri)); assert(node); @@ -390,8 +390,8 @@ test_get_base64(void) check_get_base64("Zm9vYg==", NS_XSD "base64Binary", "foob"); check_get_base64(" \f\n\r\t\vZm9v \f\n\r\t\v", NS_XSD "base64Binary", "foo"); - SerdNode* const node = serd_new_typed_literal( - serd_string("Zm9v"), serd_string(NS_XSD "base64Binary")); + SerdNode* const node = serd_new_literal( + serd_string("Zm9v"), SERD_HAS_DATATYPE, serd_string(NS_XSD "base64Binary")); char small[2] = {0}; const SerdWriteResult r = serd_get_base64(node, sizeof(small), small); @@ -429,7 +429,7 @@ test_node_from_string(void) { SerdNode* const hello = serd_new_string(serd_string("hello\"")); assert(serd_node_length(hello) == 6); - assert(serd_node_flags(hello) == SERD_HAS_QUOTE); + assert(!serd_node_flags(hello)); assert(!strncmp(serd_node_string(hello), "hello\"", 6)); assert(!strcmp(serd_node_string_view(hello).data, "hello\"")); assert(serd_node_string_view(hello).length == 6); @@ -448,7 +448,7 @@ test_node_from_substring(void) { SerdNode* const a_b = serd_new_string(serd_substring("a\"bc", 3)); assert(serd_node_length(a_b) == 3); - assert(serd_node_flags(a_b) == SERD_HAS_QUOTE); + assert(!serd_node_flags(a_b)); assert(strlen(serd_node_string(a_b)) == 3); assert(!strncmp(serd_node_string(a_b), "a\"b", 3)); serd_node_free(a_b); @@ -467,46 +467,54 @@ check_copy_equals(const SerdNode* const node) static void test_literal(void) { - SerdNode* hello2 = serd_new_string(serd_string("hello\"")); + const SerdStringView hello_str = serd_string("hello"); + const SerdStringView empty_str = serd_empty_string(); - assert(serd_node_length(hello2) == 6 && - serd_node_flags(hello2) == SERD_HAS_QUOTE && - !strcmp(serd_node_string(hello2), "hello\"")); + assert(!serd_new_literal( + hello_str, SERD_HAS_DATATYPE | SERD_HAS_LANGUAGE, serd_string("whatever"))); - check_copy_equals(hello2); + assert(!serd_new_literal(hello_str, SERD_HAS_DATATYPE, empty_str)); + assert(!serd_new_literal(hello_str, SERD_HAS_LANGUAGE, empty_str)); + + assert(!serd_new_literal(hello_str, SERD_HAS_DATATYPE, serd_string("Type"))); + assert(!serd_new_literal(hello_str, SERD_HAS_DATATYPE, serd_string("de"))); - SerdNode* hello3 = - serd_new_plain_literal(serd_string("hello\""), serd_empty_string()); + assert(!serd_new_literal(hello_str, SERD_HAS_LANGUAGE, serd_string("3n"))); + assert(!serd_new_literal(hello_str, SERD_HAS_LANGUAGE, serd_string("d3"))); + assert(!serd_new_literal(hello_str, SERD_HAS_LANGUAGE, serd_string("d3"))); + assert(!serd_new_literal(hello_str, SERD_HAS_LANGUAGE, serd_string("en-!"))); - assert(serd_node_equals(hello2, hello3)); + SerdNode* hello2 = serd_new_string(serd_string("hello\"")); - SerdNode* hello4 = - serd_new_typed_literal(serd_string("hello\""), serd_empty_string()); + assert(serd_node_length(hello2) == 6 && + !strcmp(serd_node_string(hello2), "hello\"")); - assert(!serd_new_typed_literal(serd_string("plain"), - serd_string(NS_RDF "langString"))); + check_copy_equals(hello2); - assert(serd_node_equals(hello4, hello2)); + assert(!serd_new_literal( + serd_string("plain"), SERD_HAS_DATATYPE, serd_string(NS_RDF "langString"))); - serd_node_free(hello4); - serd_node_free(hello3); serd_node_free(hello2); - const char* lang_lit_str = "\"Hello\"@en"; - SerdNode* sliced_lang_lit = serd_new_plain_literal( - serd_substring(lang_lit_str + 1, 5), serd_substring(lang_lit_str + 8, 2)); + const char* lang_lit_str = "\"Hello\"@en-ca"; + SerdNode* sliced_lang_lit = + serd_new_literal(serd_substring(lang_lit_str + 1, 5), + SERD_HAS_LANGUAGE, + serd_substring(lang_lit_str + 8, 5)); assert(!strcmp(serd_node_string(sliced_lang_lit), "Hello")); const SerdNode* const lang = serd_node_language(sliced_lang_lit); assert(lang); - assert(!strcmp(serd_node_string(lang), "en")); + assert(!strcmp(serd_node_string(lang), "en-ca")); check_copy_equals(sliced_lang_lit); serd_node_free(sliced_lang_lit); - const char* type_lit_str = "\"Hallo\"^^<http://example.org/Greeting>"; - SerdNode* sliced_type_lit = serd_new_typed_literal( - serd_substring(type_lit_str + 1, 5), serd_substring(type_lit_str + 10, 27)); + const char* type_lit_str = "\"Hallo\"^^<http://example.org/Greeting>"; + SerdNode* sliced_type_lit = + serd_new_literal(serd_substring(type_lit_str + 1, 5), + SERD_HAS_DATATYPE, + serd_substring(type_lit_str + 10, 27)); assert(!strcmp(serd_node_string(sliced_type_lit), "Hallo")); @@ -514,11 +522,6 @@ test_literal(void) assert(datatype); assert(!strcmp(serd_node_string(datatype), "http://example.org/Greeting")); serd_node_free(sliced_type_lit); - - SerdNode* const plain_lit = - serd_new_plain_literal(serd_string("Plain"), serd_empty_string()); - assert(!strcmp(serd_node_string(plain_lit), "Plain")); - serd_node_free(plain_lit); } static void @@ -537,17 +540,17 @@ test_compare(void) SerdNode* xsd_short = serd_new_uri(serd_string("http://www.w3.org/2001/XMLSchema#short")); - SerdNode* angst = - serd_new_plain_literal(serd_string("angst"), serd_empty_string()); + SerdNode* angst = serd_new_string(serd_string("angst")); - SerdNode* angst_de = - serd_new_plain_literal(serd_string("angst"), serd_string("de")); + SerdNode* angst_de = serd_new_literal( + serd_string("angst"), SERD_HAS_LANGUAGE, serd_string("de")); - SerdNode* angst_en = - serd_new_plain_literal(serd_string("angst"), serd_string("en")); + assert(angst_de); + SerdNode* angst_en = serd_new_literal( + serd_string("angst"), SERD_HAS_LANGUAGE, serd_string("en")); - SerdNode* hallo = - serd_new_plain_literal(serd_string("Hallo"), serd_string("de")); + SerdNode* hallo = serd_new_literal( + serd_string("Hallo"), SERD_HAS_LANGUAGE, serd_string("de")); SerdNode* hello = serd_new_string(serd_string("Hello")); SerdNode* universe = serd_new_string(serd_string("Universe")); @@ -555,11 +558,14 @@ test_compare(void) SerdNode* blank = serd_new_blank(serd_string("b1")); SerdNode* uri = serd_new_uri(serd_string("http://example.org/")); - SerdNode* aardvark = serd_new_typed_literal( - serd_string("alex"), serd_string("http://example.org/Aardvark")); + SerdNode* aardvark = + serd_new_literal(serd_string("alex"), + SERD_HAS_DATATYPE, + serd_string("http://example.org/Aardvark")); - SerdNode* badger = serd_new_typed_literal( - serd_string("bobby"), serd_string("http://example.org/Badger")); + SerdNode* badger = serd_new_literal(serd_string("bobby"), + SERD_HAS_DATATYPE, + serd_string("http://example.org/Badger")); // Types are ordered according to their SerdNodeType (more or less arbitrary) assert(serd_node_compare(hello, uri) < 0); diff --git a/test/test_reader_writer.c b/test/test_reader_writer.c index aa820fa6..df2c3957 100644 --- a/test/test_reader_writer.c +++ b/test/test_reader_writer.c @@ -179,8 +179,11 @@ test_writer(const char* const path) const SerdStringView en = serd_string("en"); SerdNode* const o = serd_new_string(serd_string("o")); - SerdNode* const t = serd_new_typed_literal(serd_string("t"), urn_Type); - SerdNode* const l = serd_new_plain_literal(serd_string("l"), en); + + SerdNode* const t = + serd_new_literal(serd_string("t"), SERD_HAS_DATATYPE, urn_Type); + + SerdNode* const l = serd_new_literal(serd_string("l"), SERD_HAS_LANGUAGE, en); const SerdNode* good[][3] = {{s, p, o}, {s, p, t}, {s, p, l}}; diff --git a/test/test_string.c b/test/test_string.c index b551b9fe..5205cc9c 100644 --- a/test/test_string.c +++ b/test/test_string.c @@ -3,28 +3,13 @@ #undef NDEBUG -#include "serd/node.h" #include "serd/status.h" -#include "serd/string.h" #include "zix/attributes.h" #include <assert.h> -#include <stdint.h> -#include <stdio.h> #include <string.h> static void -test_strlen(void) -{ - const uint8_t str[] = {'"', '5', 0xE2, 0x82, 0xAC, '"', '\n', 0}; - - SerdNodeFlags flags = 0; - size_t n_bytes = serd_strlen((const char*)str, &flags); - assert(n_bytes == 7 && flags == (SERD_HAS_QUOTE | SERD_HAS_NEWLINE)); - assert(serd_strlen((const char*)str, NULL) == 7); -} - -static void test_strerror(void) { const char* msg = serd_strerror(SERD_SUCCESS); @@ -41,9 +26,7 @@ test_strerror(void) ZIX_PURE_FUNC int main(void) { - test_strlen(); test_strerror(); - printf("Success\n"); return 0; } diff --git a/test/test_writer.c b/test/test_writer.c index 2a852562..9ce69c55 100644 --- a/test/test_writer.c +++ b/test/test_writer.c @@ -110,8 +110,10 @@ test_write_long_literal(void) SerdNode* s = serd_new_uri(serd_string("http://example.org/s")); SerdNode* p = serd_new_uri(serd_string("http://example.org/p")); - SerdNode* o = serd_new_string(serd_string("hello \"\"\"world\"\"\"!")); + SerdNode* o = serd_new_literal( + serd_string("hello \"\"\"world\"\"\"!"), SERD_IS_LONG, serd_empty_string()); + assert(serd_node_flags(o) & SERD_IS_LONG); assert(!serd_sink_write(serd_writer_sink(writer), 0, s, p, o, NULL)); serd_node_free(o); |