diff options
author | David Robillard <d@drobilla.net> | 2021-07-22 15:26:22 -0400 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2023-12-02 18:49:08 -0500 |
commit | 5e4538756d601e6a941c5290777af95ea8848e1a (patch) | |
tree | 9868e188a48a528e9908fcf695147f75790c3a56 /src/node.c | |
parent | 64024d0fa6a6dc048b2b846738846da597025f56 (diff) | |
download | serd-5e4538756d601e6a941c5290777af95ea8848e1a.tar.gz serd-5e4538756d601e6a941c5290777af95ea8848e1a.tar.bz2 serd-5e4538756d601e6a941c5290777af95ea8848e1a.zip |
[WIP] Preserve long or short quoting from input documents
Diffstat (limited to 'src/node.c')
-rw-r--r-- | src/node.c | 148 |
1 files changed, 77 insertions, 71 deletions
@@ -11,7 +11,6 @@ #include "serd/buffer.h" #include "serd/node.h" #include "serd/status.h" -#include "serd/string.h" #include "serd/string_view.h" #include "serd/uri.h" #include "serd/write_result.h" @@ -195,88 +194,92 @@ serd_new_token(const SerdNodeType type, const SerdStringView str) SerdNode* serd_new_string(const SerdStringView str) { - SerdNodeFlags flags = 0; - const size_t length = serd_substrlen(str.data, str.length, &flags); - SerdNode* node = serd_node_malloc(length, flags, SERD_LITERAL); - - memcpy(serd_node_buffer(node), str.data, str.length); - node->length = length; - - serd_node_check_padding(node); - return node; -} - -/// Internal pre-measured implementation of serd_new_plain_literal -static SerdNode* -serd_new_plain_literal_i(const SerdStringView str, - SerdNodeFlags flags, - const SerdStringView lang) -{ - assert(str.length); - assert(lang.length); - - flags |= SERD_HAS_LANGUAGE; - - const size_t len = serd_node_pad_length(str.length); - const size_t total_len = len + sizeof(SerdNode) + lang.length; + SerdNodeFlags flags = 0U; + SerdNode* node = serd_node_malloc(str.length, flags, SERD_LITERAL); - SerdNode* node = serd_node_malloc(total_len, flags, SERD_LITERAL); - memcpy(serd_node_buffer(node), str.data, str.length); - node->length = str.length; + if (node) { + if (str.data && str.length) { + memcpy(serd_node_buffer(node), str.data, str.length); + } - SerdNode* lang_node = node + 1 + (len / sizeof(SerdNode)); - lang_node->type = SERD_LITERAL; - lang_node->length = lang.length; - memcpy(serd_node_buffer(lang_node), lang.data, lang.length); - serd_node_check_padding(lang_node); + node->length = str.length; + serd_node_check_padding(node); + } - serd_node_check_padding(node); return node; } -SerdNode* -serd_new_plain_literal(const SerdStringView str, const SerdStringView lang) +ZIX_PURE_FUNC static bool +is_langtag(const SerdStringView string) { - if (!lang.length) { - return serd_new_string(str); + // First character must be a letter + size_t i = 0; + if (!string.length || !is_alpha(string.data[i])) { + return false; } - SerdNodeFlags flags = 0; - serd_strlen(str.data, &flags); + // First component must be all letters + while (++i < string.length && string.data[i] && string.data[i] != '-') { + if (!is_alpha(string.data[i])) { + return false; + } + } - return serd_new_plain_literal_i(str, flags, lang); + // Following components can have letters and digits + while (i < string.length && string.data[i] == '-') { + while (++i < string.length && string.data[i] && string.data[i] != '-') { + const char c = string.data[i]; + if (!is_alpha(c) && !is_digit(c)) { + return false; + } + } + } + + return true; } SerdNode* -serd_new_typed_literal(const SerdStringView str, - const SerdStringView datatype_uri) +serd_new_literal(const SerdStringView string, + const SerdNodeFlags flags, + const SerdStringView meta) { - if (!datatype_uri.length) { - return serd_new_string(str); + if (!(flags & (SERD_HAS_DATATYPE | SERD_HAS_LANGUAGE))) { + SerdNode* node = serd_node_malloc(string.length, flags, SERD_LITERAL); + + memcpy(serd_node_buffer(node), string.data, string.length); + node->length = string.length; + serd_node_check_padding(node); + return node; } - if (!strcmp(datatype_uri.data, NS_RDF "langString")) { + if ((flags & SERD_HAS_DATATYPE) && (flags & SERD_HAS_LANGUAGE)) { return NULL; } - SerdNodeFlags flags = 0U; - serd_strlen(str.data, &flags); + if (!meta.length) { + return NULL; + } - flags |= SERD_HAS_DATATYPE; + if (((flags & SERD_HAS_DATATYPE) && + (!serd_uri_string_has_scheme(meta.data) || + !strcmp(meta.data, NS_RDF "langString"))) || + ((flags & SERD_HAS_LANGUAGE) && !is_langtag(meta))) { + return NULL; + } - const size_t len = serd_node_pad_length(str.length); - const size_t total_len = len + sizeof(SerdNode) + datatype_uri.length; + const size_t len = serd_node_pad_length(string.length); + const size_t meta_len = serd_node_pad_length(meta.length); + const size_t meta_size = sizeof(SerdNode) + meta_len; - SerdNode* node = serd_node_malloc(total_len, flags, SERD_LITERAL); - memcpy(serd_node_buffer(node), str.data, str.length); - node->length = str.length; + SerdNode* node = serd_node_malloc(len + meta_size, flags, SERD_LITERAL); + memcpy(serd_node_buffer(node), string.data, string.length); + node->length = string.length; - SerdNode* datatype_node = node + 1 + (len / sizeof(SerdNode)); - datatype_node->length = datatype_uri.length; - datatype_node->type = SERD_URI; - memcpy( - serd_node_buffer(datatype_node), datatype_uri.data, datatype_uri.length); - serd_node_check_padding(datatype_node); + SerdNode* meta_node = node + 1U + (len / sizeof(SerdNode)); + meta_node->length = meta.length; + meta_node->type = (flags & SERD_HAS_DATATYPE) ? SERD_URI : SERD_LITERAL; + memcpy(serd_node_buffer(meta_node), meta.data, meta.length); + serd_node_check_padding(meta_node); serd_node_check_padding(node); return node; @@ -548,13 +551,6 @@ typedef size_t (*SerdWriteLiteralFunc)(const void* user_data, size_t buf_size, char* buf); -SerdNode* -serd_new_boolean(bool b) -{ - return serd_new_typed_literal(b ? serd_string("true") : serd_string("false"), - serd_node_string_view(&serd_xsd_boolean.node)); -} - static SerdNode* serd_new_custom_literal(const void* const user_data, const size_t len, @@ -589,8 +585,9 @@ serd_new_double(const double d) const ExessResult r = exess_write_double(d, sizeof(buf), buf); return r.status ? NULL - : serd_new_typed_literal(serd_substring(buf, r.count), - serd_string(EXESS_XSD_URI "double")); + : serd_new_literal(serd_substring(buf, r.count), + SERD_HAS_DATATYPE, + serd_string(EXESS_XSD_URI "double")); } SerdNode* @@ -601,8 +598,17 @@ serd_new_float(const float f) const ExessResult r = exess_write_float(f, sizeof(buf), buf); return r.status ? NULL - : serd_new_typed_literal(serd_substring(buf, r.count), - serd_string(EXESS_XSD_URI "float")); + : serd_new_literal(serd_substring(buf, r.count), + SERD_HAS_DATATYPE, + serd_string(EXESS_XSD_URI "float")); +} + +SerdNode* +serd_new_boolean(bool b) +{ + return serd_new_literal(b ? serd_string("true") : serd_string("false"), + SERD_HAS_DATATYPE, + serd_node_string_view(&serd_xsd_boolean.node)); } SerdNode* |