diff options
-rw-r--r-- | NEWS | 1 | ||||
-rw-r--r-- | include/serd/serd.h | 7 | ||||
-rw-r--r-- | src/env.c | 9 | ||||
-rw-r--r-- | src/node.c | 36 | ||||
-rw-r--r-- | src/reader.c | 10 | ||||
-rw-r--r-- | src/reader.h | 5 | ||||
-rw-r--r-- | src/string.c | 50 | ||||
-rw-r--r-- | src/string_utils.h | 5 | ||||
-rw-r--r-- | src/writer.c | 7 | ||||
-rw-r--r-- | test/test_node.c | 15 | ||||
-rw-r--r-- | test/test_string.c | 11 |
11 files changed, 55 insertions, 101 deletions
@@ -2,6 +2,7 @@ serd (1.1.1) unstable; urgency=medium * Remove SERD_DISABLE_DEPRECATED and SERD_DEPRECATED_BY * Remove serd_uri_to_path() + * Remove useless character counting from API -- David Robillard <d@drobilla.net> Wed, 13 Jul 2022 20:39:07 +0000 diff --git a/include/serd/serd.h b/include/serd/serd.h index 1ac38fcd..851dce1b 100644 --- a/include/serd/serd.h +++ b/include/serd/serd.h @@ -126,14 +126,12 @@ serd_strerror(SerdStatus status); /** Measure a UTF-8 string. - @return Length of `str` in characters (except NULL). + @return Length of `str` in bytes. @param str A null-terminated UTF-8 string. - @param n_bytes (Output) Set to the size of `str` in bytes (except NULL). @param flags (Output) Set to the applicable flags. */ SERD_API size_t serd_strlen(const uint8_t* SERD_NONNULL str, - size_t* SERD_NULLABLE n_bytes, SerdNodeFlags* SERD_NULLABLE flags); /** @@ -341,12 +339,11 @@ typedef enum { typedef struct { const uint8_t* SERD_NULLABLE buf; ///< Value string size_t n_bytes; ///< Size in bytes (excluding null) - size_t n_chars; ///< String length (excluding null) SerdNodeFlags flags; ///< Node flags (string properties) SerdType type; ///< Node type } SerdNode; -static const SerdNode SERD_NODE_NULL = {NULL, 0, 0, 0, SERD_NOTHING}; +static const SerdNode SERD_NODE_NULL = {NULL, 0, 0, SERD_NOTHING}; /** Make a (shallow) node from `str`. @@ -231,12 +231,10 @@ serd_env_expand_node(const SerdEnv* const env, const SerdNode* const node) case SERD_NOTHING: case SERD_LITERAL: break; - case SERD_URI: { SerdURI ignored; return serd_node_new_uri_from_node(node, &env->base_uri, &ignored); } - case SERD_CURIE: { SerdChunk prefix; SerdChunk suffix; @@ -245,16 +243,13 @@ serd_env_expand_node(const SerdEnv* const env, const SerdNode* const node) } const size_t len = prefix.len + suffix.len; uint8_t* buf = (uint8_t*)malloc(len + 1); - SerdNode ret = {buf, len, 0, 0, SERD_URI}; - snprintf((char*)buf, len + 1, "%s%s", prefix.buf, suffix.buf); - ret.n_chars = serd_strlen(buf, NULL, NULL); + SerdNode ret = {buf, len, 0, SERD_URI}; + snprintf((char*)buf, ret.n_bytes + 1, "%s%s", prefix.buf, suffix.buf); return ret; } - case SERD_BLANK: break; } - return SERD_NODE_NULL; } @@ -8,7 +8,6 @@ #include "serd/serd.h" -#include <assert.h> #include <float.h> #include <math.h> #include <stdbool.h> @@ -61,10 +60,9 @@ serd_node_from_string(const SerdType type, const uint8_t* const str) return SERD_NODE_NULL; } - SerdNodeFlags flags = 0; - size_t buf_n_bytes = 0; - const size_t buf_n_chars = serd_strlen(str, &buf_n_bytes, &flags); - SerdNode ret = {str, buf_n_bytes, buf_n_chars, flags, type}; + SerdNodeFlags flags = 0; + const size_t n_bytes = serd_strlen(str, &flags); + const SerdNode ret = {str, n_bytes, flags, type}; return ret; } @@ -77,11 +75,9 @@ serd_node_from_substring(const SerdType type, return SERD_NODE_NULL; } - SerdNodeFlags flags = 0; - size_t buf_n_bytes = 0; - const size_t buf_n_chars = serd_substrlen(str, len, &buf_n_bytes, &flags); - assert(buf_n_bytes <= len); - SerdNode ret = {str, buf_n_bytes, buf_n_chars, flags, type}; + SerdNodeFlags flags = 0; + const size_t n_bytes = serd_substrlen(str, len, &flags); + const SerdNode ret = {str, n_bytes, flags, type}; return ret; } @@ -104,7 +100,6 @@ serd_node_equals(const SerdNode* const a, const SerdNode* const b) { return (a == b) || (a->type == b->type && a->n_bytes == b->n_bytes && - a->n_chars == b->n_chars && ((a->buf == b->buf) || !memcmp((const char*)a->buf, (const char*)b->buf, a->n_bytes + 1))); } @@ -245,13 +240,12 @@ serd_node_new_uri(const SerdURI* const uri, const size_t len = serd_uri_string_length(&abs_uri); uint8_t* buf = (uint8_t*)malloc(len + 1); - SerdNode node = {buf, 0, 0, 0, SERD_URI}; + SerdNode node = {buf, len, 0, SERD_URI}; uint8_t* ptr = buf; const size_t actual_len = serd_uri_serialise(&abs_uri, string_sink, &ptr); buf[actual_len] = '\0'; node.n_bytes = actual_len; - node.n_chars = serd_strlen(buf, NULL, NULL); if (out) { serd_uri_parse(buf, out); // TODO: cleverly avoid double parse @@ -269,14 +263,13 @@ serd_node_new_relative_uri(const SerdURI* const uri, const size_t uri_len = serd_uri_string_length(uri); const size_t base_len = serd_uri_string_length(base); uint8_t* buf = (uint8_t*)malloc(uri_len + base_len + 1); - SerdNode node = {buf, 0, 0, 0, SERD_URI}; + SerdNode node = {buf, 0, 0, SERD_URI}; uint8_t* ptr = buf; const size_t actual_len = serd_uri_serialise_relative(uri, base, root, string_sink, &ptr); buf[actual_len] = '\0'; node.n_bytes = actual_len; - node.n_chars = serd_strlen(buf, NULL, NULL); if (out) { serd_uri_parse(buf, out); // TODO: cleverly avoid double parse @@ -302,7 +295,7 @@ serd_node_new_decimal(const double d, const unsigned frac_digits) const double abs_d = fabs(d); const unsigned int_digits = serd_digits(abs_d); char* buf = (char*)calloc(int_digits + frac_digits + 3, 1); - SerdNode node = {(const uint8_t*)buf, 0, 0, 0, SERD_LITERAL}; + SerdNode node = {(const uint8_t*)buf, 0, 0, SERD_LITERAL}; const double int_part = floor(abs_d); // Point s to decimal point location @@ -325,7 +318,7 @@ serd_node_new_decimal(const double d, const unsigned frac_digits) double frac_part = fabs(d - int_part); if (frac_part < DBL_EPSILON) { *s++ = '0'; - node.n_bytes = node.n_chars = (size_t)(s - buf); + node.n_bytes = (size_t)(s - buf); } else { uint64_t frac = (uint64_t)llround(frac_part * pow(10.0, (int)frac_digits)); s += frac_digits - 1; @@ -335,7 +328,7 @@ serd_node_new_decimal(const double d, const unsigned frac_digits) for (; i < frac_digits - 1 && !(frac % 10); ++i, --s, frac /= 10) { } - node.n_bytes = node.n_chars = (size_t)(s - buf) + 1U; + node.n_bytes = (size_t)(s - buf) + 1U; // Write digits from last trailing zero to decimal point for (; i < frac_digits; ++i) { @@ -353,7 +346,7 @@ serd_node_new_integer(const int64_t i) uint64_t abs_i = (uint64_t)((i < 0) ? -i : i); const unsigned digits = serd_digits((double)abs_i); char* buf = (char*)calloc(digits + 2, 1); - SerdNode node = {(const uint8_t*)buf, 0, 0, 0, SERD_LITERAL}; + SerdNode node = {(const uint8_t*)buf, 0, 0, SERD_LITERAL}; // Point s to the end char* s = buf + digits - 1; @@ -362,7 +355,7 @@ serd_node_new_integer(const int64_t i) ++s; } - node.n_bytes = node.n_chars = (size_t)(s - buf) + 1U; + node.n_bytes = (size_t)(s - buf) + 1U; // Write integer part (right to left) do { @@ -379,12 +372,11 @@ serd_node_new_blob(const void* const buf, { const size_t len = serd_base64_get_length(size, wrap_lines); uint8_t* str = (uint8_t*)calloc(len + 2, 1); - SerdNode node = {str, len, len, 0, SERD_LITERAL}; + SerdNode node = {str, len, 0, SERD_LITERAL}; if (serd_base64_encode(str, buf, size, wrap_lines)) { node.flags |= SERD_HAS_NEWLINE; } - return node; } diff --git a/src/reader.c b/src/reader.c index 5d87216d..0aa468a2 100644 --- a/src/reader.c +++ b/src/reader.c @@ -33,7 +33,7 @@ set_blank_id(SerdReader* const reader, const Ref ref, const size_t buf_size) { SerdNode* node = deref(reader, ref); const char* prefix = reader->bprefix ? (const char*)reader->bprefix : ""; - node->n_bytes = node->n_chars = (size_t)snprintf( + node->n_bytes = (size_t)snprintf( (char*)node->buf, buf_size, "%sb%u", prefix, reader->next_id++); } @@ -81,10 +81,10 @@ push_node_padded(SerdReader* const reader, &reader->stack, sizeof(SerdNode) + maxlen + 1, sizeof(SerdNode)); SerdNode* const node = (SerdNode*)mem; - node->n_bytes = node->n_chars = n_bytes; - node->flags = 0; - node->type = type; - node->buf = NULL; + node->n_bytes = n_bytes; + node->flags = 0; + node->type = type; + node->buf = NULL; uint8_t* buf = (uint8_t*)(node + 1); memcpy(buf, str, n_bytes + 1); diff --git a/src/reader.h b/src/reader.h index 9b558d1f..81edbc09 100644 --- a/src/reader.h +++ b/src/reader.h @@ -168,12 +168,7 @@ push_byte(SerdReader* reader, Ref ref, const int c) uint8_t* const s = (uint8_t*)serd_stack_push(&reader->stack, 1); SerdNode* const node = (SerdNode*)(reader->stack.buf + ref); - ++node->n_bytes; - if (!(c & 0x80)) { // Starts with 0 bit, start of new character - ++node->n_chars; - } - *(s - 1) = (uint8_t)c; *s = '\0'; return SERD_SUCCESS; diff --git a/src/string.c b/src/string.c index ecba3463..37d71c8b 100644 --- a/src/string.c +++ b/src/string.c @@ -5,9 +5,11 @@ #include "serd/serd.h" +#include <assert.h> #include <math.h> #include <stdint.h> #include <stdlib.h> +#include <string.h> void serd_free(void* const ptr) @@ -64,48 +66,32 @@ serd_update_flags(const uint8_t c, SerdNodeFlags* const flags) size_t serd_substrlen(const uint8_t* const str, const size_t len, - size_t* const n_bytes, SerdNodeFlags* const flags) { - size_t n_chars = 0; - size_t i = 0; - SerdNodeFlags f = 0; + assert(flags); + + size_t i = 0; + *flags = 0; for (; i < len && str[i]; ++i) { - if ((str[i] & 0xC0) != 0x80) { // Start of new character - ++n_chars; - serd_update_flags(str[i], &f); - } - } - if (n_bytes) { - *n_bytes = i; + serd_update_flags(str[i], flags); } - if (flags) { - *flags = f; - } - return n_chars; + + return i; } size_t -serd_strlen(const uint8_t* const str, - size_t* const n_bytes, - SerdNodeFlags* const flags) +serd_strlen(const uint8_t* const str, SerdNodeFlags* const flags) { - size_t n_chars = 0; - size_t i = 0; - SerdNodeFlags f = 0; - for (; str[i]; ++i) { - if ((str[i] & 0xC0) != 0x80) { // Start of new character - ++n_chars; - serd_update_flags(str[i], &f); - } - } - if (n_bytes) { - *n_bytes = i; - } if (flags) { - *flags = f; + size_t i = 0; + *flags = 0; + for (; str[i]; ++i) { + serd_update_flags(str[i], flags); + } + return i; } - return n_chars; + + return strlen((const char*)str); } static double diff --git a/src/string_utils.h b/src/string_utils.h index 7770e1eb..c6b85ac7 100644 --- a/src/string_utils.h +++ b/src/string_utils.h @@ -84,10 +84,7 @@ is_windows_path(const uint8_t* path) } size_t -serd_substrlen(const uint8_t* str, - size_t len, - size_t* n_bytes, - SerdNodeFlags* flags); +serd_substrlen(const uint8_t* str, size_t len, SerdNodeFlags* flags); static inline uint8_t hex_digit_value(const uint8_t c) diff --git a/src/writer.c b/src/writer.c index 2a6d7c31..c72ce86b 100644 --- a/src/writer.c +++ b/src/writer.c @@ -43,9 +43,9 @@ typedef struct { } WriteContext; static const WriteContext WRITE_CONTEXT_NULL = {CTX_NAMED, - {0, 0, 0, 0, SERD_NOTHING}, - {0, 0, 0, 0, SERD_NOTHING}, - {0, 0, 0, 0, SERD_NOTHING}, + {0, 0, 0, SERD_NOTHING}, + {0, 0, 0, SERD_NOTHING}, + {0, 0, 0, SERD_NOTHING}, 0U, 0U}; @@ -192,7 +192,6 @@ copy_node(SerdNode* dst, const SerdNode* src) if (new_buf) { dst->buf = new_buf; dst->n_bytes = src->n_bytes; - dst->n_chars = src->n_chars; dst->flags = src->flags; dst->type = src->type; memcpy((char*)dst->buf, src->buf, new_size); diff --git a/test/test_node.c b/test/test_node.c index 67958cc5..1fc7f0bd 100644 --- a/test/test_node.c +++ b/test/test_node.c @@ -86,7 +86,7 @@ test_double_to_node(void) : ((const char*)node.buf == dbl_test_strs[i]); assert(pass); const size_t len = node.buf ? strlen((const char*)node.buf) : 0; - assert(node.n_bytes == len && node.n_chars == len); + assert(node.n_bytes == len); serd_node_free(&node); } } @@ -105,7 +105,7 @@ test_integer_to_node(void) SerdNode node = serd_node_new_integer(int_test_nums[i]); assert(!strcmp((const char*)node.buf, (const char*)int_test_strs[i])); const size_t len = strlen((const char*)node.buf); - assert(node.n_bytes == len && node.n_chars == len); + assert(node.n_bytes == len); serd_node_free(&node); } @@ -123,7 +123,6 @@ test_blob_to_node(void) SerdNode blob = serd_node_new_blob(data, size, size % 5); - assert(blob.n_bytes == blob.n_chars); assert(blob.n_bytes == strlen((const char*)blob.buf)); size_t out_size = 0; @@ -163,8 +162,7 @@ test_node_from_string(void) SerdNode node = serd_node_from_string(SERD_LITERAL, (const uint8_t*)"hello\""); - assert(node.n_bytes == 6 && node.n_chars == 6 && - node.flags == SERD_HAS_QUOTE && + assert(node.n_bytes == 6 && node.flags == SERD_HAS_QUOTE && !strcmp((const char*)node.buf, "hello\"")); node = serd_node_from_string(SERD_URI, NULL); @@ -175,15 +173,14 @@ static void test_node_from_substring(void) { SerdNode empty = serd_node_from_substring(SERD_LITERAL, NULL, 32); - assert(!empty.buf && !empty.n_bytes && !empty.n_chars && !empty.flags && - !empty.type); + assert(!empty.buf && !empty.n_bytes && !empty.flags && !empty.type); SerdNode a_b = serd_node_from_substring(SERD_LITERAL, USTR("a\"bc"), 3); - assert(a_b.n_bytes == 3 && a_b.n_chars == 3 && a_b.flags == SERD_HAS_QUOTE && + assert(a_b.n_bytes == 3 && a_b.flags == SERD_HAS_QUOTE && !strncmp((const char*)a_b.buf, "a\"b", 3)); a_b = serd_node_from_substring(SERD_LITERAL, USTR("a\"bc"), 10); - assert(a_b.n_bytes == 4 && a_b.n_chars == 4 && a_b.flags == SERD_HAS_QUOTE && + assert(a_b.n_bytes == 4 && a_b.flags == SERD_HAS_QUOTE && !strncmp((const char*)a_b.buf, "a\"bc", 4)); } diff --git a/test/test_string.c b/test/test_string.c index 6767e5ae..85dccd62 100644 --- a/test/test_string.c +++ b/test/test_string.c @@ -15,15 +15,10 @@ test_strlen(void) { const uint8_t str[] = {'"', '5', 0xE2, 0x82, 0xAC, '"', '\n', 0}; - size_t n_bytes = 0; SerdNodeFlags flags = 0; - size_t len = serd_strlen(str, &n_bytes, &flags); - assert(len == 5 && n_bytes == 7 && - flags == (SERD_HAS_QUOTE | SERD_HAS_NEWLINE)); - len = serd_strlen(str, NULL, &flags); - assert(len == 5); - - assert(serd_strlen(str, &n_bytes, NULL) == 5); + size_t n_bytes = serd_strlen(str, &flags); + assert(n_bytes == 7 && flags == (SERD_HAS_QUOTE | SERD_HAS_NEWLINE)); + assert(serd_strlen(str, NULL) == 7); } static void |