From 4d535bbe0390ed4f03c611e433145c9e49cbf3ad Mon Sep 17 00:00:00 2001 From: David Robillard Date: Sun, 9 Jul 2017 14:59:05 +0200 Subject: Add serd_node_from_substring() This allows creating nodes in-place from substrings of other strings to allow zero-copy serialization from existing delimited buffers. --- src/node.c | 18 +++++++++++++++++- src/serd_internal.h | 6 ++++++ src/string.c | 47 ++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 61 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/node.c b/src/node.c index 42992917..10df10d9 100644 --- a/src/node.c +++ b/src/node.c @@ -46,6 +46,22 @@ serd_node_from_string(SerdType type, const uint8_t* str) return ret; } +SERD_API +SerdNode +serd_node_from_substring(SerdType type, const uint8_t* str, const size_t len) +{ + if (!str) { + return SERD_NODE_NULL; + } + + uint32_t flags = 0; + size_t buf_n_bytes = 0; + const size_t buf_n_chars = serd_substrlen(str, len, &buf_n_bytes, &flags); + assert(buf_n_bytes <= len); + SerdNode ret = { str, buf_n_bytes, buf_n_chars, flags, type }; + return ret; +} + SERD_API SerdNode serd_node_copy(const SerdNode* node) @@ -187,7 +203,7 @@ serd_node_new_file_uri(const uint8_t* path, serd_uri_parse(chunk.buf, out); } - return serd_node_from_string(SERD_URI, chunk.buf); + return serd_node_from_substring(SERD_URI, chunk.buf, chunk.len); } SERD_API diff --git a/src/serd_internal.h b/src/serd_internal.h index afbea5fb..55f6a6b6 100644 --- a/src/serd_internal.h +++ b/src/serd_internal.h @@ -325,6 +325,12 @@ is_windows_path(const uint8_t* path) && (path[2] == '/' || path[2] == '\\'); } +size_t +serd_substrlen(const uint8_t* str, + const size_t len, + size_t* n_bytes, + SerdNodeFlags* flags); + /* URI utilities */ static inline bool diff --git a/src/string.c b/src/string.c index 9381015d..dedd0713 100644 --- a/src/string.c +++ b/src/string.c @@ -36,6 +36,42 @@ serd_strerror(SerdStatus status) return (const uint8_t*)"Unknown error"; // never reached } +static inline void +serd_update_flags(const uint8_t c, SerdNodeFlags* const flags) +{ + switch (c) { + case '\r': case '\n': + *flags |= SERD_HAS_NEWLINE; + break; + case '"': + *flags |= SERD_HAS_QUOTE; + } +} + +size_t +serd_substrlen(const uint8_t* const str, + const size_t len, + size_t* const n_bytes, + SerdNodeFlags* const flags) +{ + size_t n_chars = 0; + size_t i = 0; + SerdNodeFlags f = 0; + for (; i < len && str[i]; ++i) { + if ((str[i] & 0xC0) != 0x80) { // Start of new character + ++n_chars; + serd_update_flags(str[i], &f); + } + } + if (n_bytes) { + *n_bytes = i; + } + if (flags) { + *flags = f; + } + return n_chars; +} + SERD_API size_t serd_strlen(const uint8_t* str, size_t* n_bytes, SerdNodeFlags* flags) @@ -44,16 +80,9 @@ serd_strlen(const uint8_t* str, size_t* n_bytes, SerdNodeFlags* flags) size_t i = 0; SerdNodeFlags f = 0; for (; str[i]; ++i) { - if ((str[i] & 0xC0) != 0x80) { - // Does not start with `10', start of a new character + if ((str[i] & 0xC0) != 0x80) { // Start of new character ++n_chars; - switch (str[i]) { - case '\r': case '\n': - f |= SERD_HAS_NEWLINE; - break; - case '"': - f |= SERD_HAS_QUOTE; - } + serd_update_flags(str[i], &f); } } if (n_bytes) { -- cgit v1.2.1