diff options
-rw-r--r-- | NEWS | 1 | ||||
-rw-r--r-- | include/serd/serd.h | 7 | ||||
-rw-r--r-- | src/env.c | 9 | ||||
-rw-r--r-- | src/node.c | 36 | ||||
-rw-r--r-- | src/reader.c | 10 | ||||
-rw-r--r-- | src/reader.h | 5 | ||||
-rw-r--r-- | src/string.c | 48 | ||||
-rw-r--r-- | src/string_utils.h | 5 | ||||
-rw-r--r-- | src/writer.c | 7 | ||||
-rw-r--r-- | test/test_node.c | 15 | ||||
-rw-r--r-- | test/test_string.c | 11 |
11 files changed, 55 insertions, 99 deletions
@@ -2,6 +2,7 @@ serd (1.0.1) unstable; * Add SerdBuffer for mutable buffers to keep SerdChunk const-correct * Remove serd_uri_to_path() + * Remove useless character counting from API -- David Robillard <d@drobilla.net> Wed, 13 Jan 2021 13:29:44 +0000 diff --git a/include/serd/serd.h b/include/serd/serd.h index c60bf465..5d5803d0 100644 --- a/include/serd/serd.h +++ b/include/serd/serd.h @@ -187,7 +187,6 @@ typedef uint32_t SerdNodeFlags; typedef struct { const uint8_t* SERD_NULLABLE buf; ///< Value string size_t n_bytes; ///< Size in bytes (excluding null) - size_t n_chars; ///< String length (excluding null) SerdNodeFlags flags; ///< Node flags (string properties) SerdType type; ///< Node type } SerdNode; @@ -269,15 +268,13 @@ serd_strerror(SerdStatus status); /** Measure a UTF-8 string. - @return Length of `str` in characters (except NULL). + @return Length of `str` in bytes. @param str A null-terminated UTF-8 string. - @param n_bytes (Output) Set to the size of `str` in bytes (except NULL). @param flags (Output) Set to the applicable flags. */ SERD_API size_t serd_strlen(const uint8_t* SERD_NONNULL str, - size_t* SERD_NULLABLE n_bytes, SerdNodeFlags* SERD_NULLABLE flags); /** @@ -418,7 +415,7 @@ serd_uri_serialise_relative(const SerdURI* SERD_NONNULL uri, @{ */ -static const SerdNode SERD_NODE_NULL = {NULL, 0, 0, 0, SERD_NOTHING}; +static const SerdNode SERD_NODE_NULL = {NULL, 0, 0, SERD_NOTHING}; /** Make a (shallow) node from `str`. @@ -220,12 +220,10 @@ serd_env_expand_node(const SerdEnv* env, const SerdNode* node) case SERD_NOTHING: case SERD_LITERAL: break; - case SERD_URI: { SerdURI ignored; return serd_node_new_uri_from_node(node, &env->base_uri, &ignored); } - case SERD_CURIE: { SerdChunk prefix; SerdChunk suffix; @@ -234,16 +232,13 @@ serd_env_expand_node(const SerdEnv* env, const SerdNode* node) } const size_t len = prefix.len + suffix.len; uint8_t* buf = (uint8_t*)malloc(len + 1); - SerdNode ret = {buf, len, 0, 0, SERD_URI}; - snprintf((char*)buf, len + 1, "%s%s", prefix.buf, suffix.buf); - ret.n_chars = serd_strlen(buf, NULL, NULL); + SerdNode ret = {buf, len, 0, SERD_URI}; + snprintf((char*)buf, ret.n_bytes + 1, "%s%s", prefix.buf, suffix.buf); return ret; } - case SERD_BLANK: break; } - return SERD_NODE_NULL; } @@ -21,7 +21,6 @@ #include "serd/serd.h" -#include <assert.h> #include <float.h> #include <math.h> #include <stdbool.h> @@ -46,10 +45,9 @@ serd_node_from_string(SerdType type, const uint8_t* str) return SERD_NODE_NULL; } - SerdNodeFlags flags = 0; - size_t buf_n_bytes = 0; - const size_t buf_n_chars = serd_strlen(str, &buf_n_bytes, &flags); - SerdNode ret = {str, buf_n_bytes, buf_n_chars, flags, type}; + SerdNodeFlags flags = 0; + const size_t n_bytes = serd_strlen(str, &flags); + const SerdNode ret = {str, n_bytes, flags, type}; return ret; } @@ -60,11 +58,9 @@ serd_node_from_substring(SerdType type, const uint8_t* str, const size_t len) return SERD_NODE_NULL; } - SerdNodeFlags flags = 0; - size_t buf_n_bytes = 0; - const size_t buf_n_chars = serd_substrlen(str, len, &buf_n_bytes, &flags); - assert(buf_n_bytes <= len); - SerdNode ret = {str, buf_n_bytes, buf_n_chars, flags, type}; + SerdNodeFlags flags = 0; + const size_t n_bytes = serd_substrlen(str, len, &flags); + const SerdNode ret = {str, n_bytes, flags, type}; return ret; } @@ -87,7 +83,6 @@ serd_node_equals(const SerdNode* a, const SerdNode* b) { return (a == b) || (a->type == b->type && a->n_bytes == b->n_bytes && - a->n_chars == b->n_chars && ((a->buf == b->buf) || !memcmp((const char*)a->buf, (const char*)b->buf, a->n_bytes + 1))); } @@ -242,13 +237,12 @@ serd_node_new_uri(const SerdURI* uri, const SerdURI* base, SerdURI* out) const size_t len = serd_uri_string_length(&abs_uri); uint8_t* buf = (uint8_t*)malloc(len + 1); - SerdNode node = {buf, 0, 0, 0, SERD_URI}; + SerdNode node = {buf, len, 0, SERD_URI}; uint8_t* ptr = buf; const size_t actual_len = serd_uri_serialise(&abs_uri, string_sink, &ptr); buf[actual_len] = '\0'; node.n_bytes = actual_len; - node.n_chars = serd_strlen(buf, NULL, NULL); if (out) { serd_uri_parse(buf, out); // TODO: cleverly avoid double parse @@ -266,14 +260,13 @@ serd_node_new_relative_uri(const SerdURI* uri, const size_t uri_len = serd_uri_string_length(uri); const size_t base_len = serd_uri_string_length(base); uint8_t* buf = (uint8_t*)malloc(uri_len + base_len + 1); - SerdNode node = {buf, 0, 0, 0, SERD_URI}; + SerdNode node = {buf, 0, 0, SERD_URI}; uint8_t* ptr = buf; const size_t actual_len = serd_uri_serialise_relative(uri, base, root, string_sink, &ptr); buf[actual_len] = '\0'; node.n_bytes = actual_len; - node.n_chars = serd_strlen(buf, NULL, NULL); if (out) { serd_uri_parse(buf, out); // TODO: cleverly avoid double parse @@ -299,7 +292,7 @@ serd_node_new_decimal(double d, unsigned frac_digits) const double abs_d = fabs(d); const unsigned int_digits = serd_digits(abs_d); char* buf = (char*)calloc(int_digits + frac_digits + 3, 1); - SerdNode node = {(const uint8_t*)buf, 0, 0, 0, SERD_LITERAL}; + SerdNode node = {(const uint8_t*)buf, 0, 0, SERD_LITERAL}; const double int_part = floor(abs_d); // Point s to decimal point location @@ -322,7 +315,7 @@ serd_node_new_decimal(double d, unsigned frac_digits) double frac_part = fabs(d - int_part); if (frac_part < DBL_EPSILON) { *s++ = '0'; - node.n_bytes = node.n_chars = (size_t)(s - buf); + node.n_bytes = (size_t)(s - buf); } else { uint64_t frac = (uint64_t)llround(frac_part * pow(10.0, (int)frac_digits)); s += frac_digits - 1; @@ -332,7 +325,7 @@ serd_node_new_decimal(double d, unsigned frac_digits) for (; i < frac_digits - 1 && !(frac % 10); ++i, --s, frac /= 10) { } - node.n_bytes = node.n_chars = (size_t)(s - buf) + 1u; + node.n_bytes = (size_t)(s - buf) + 1u; // Write digits from last trailing zero to decimal point for (; i < frac_digits; ++i) { @@ -350,7 +343,7 @@ serd_node_new_integer(int64_t i) uint64_t abs_i = (i < 0) ? -i : i; const unsigned digits = serd_digits((double)abs_i); char* buf = (char*)calloc(digits + 2, 1); - SerdNode node = {(const uint8_t*)buf, 0, 0, 0, SERD_LITERAL}; + SerdNode node = {(const uint8_t*)buf, 0, 0, SERD_LITERAL}; // Point s to the end char* s = buf + digits - 1; @@ -359,7 +352,7 @@ serd_node_new_integer(int64_t i) ++s; } - node.n_bytes = node.n_chars = (size_t)(s - buf) + 1u; + node.n_bytes = (size_t)(s - buf) + 1u; // Write integer part (right to left) do { @@ -374,12 +367,11 @@ serd_node_new_blob(const void* buf, size_t size, bool wrap_lines) { const size_t len = serd_base64_get_length(size, wrap_lines); uint8_t* str = (uint8_t*)calloc(len + 2, 1); - SerdNode node = {str, len, len, 0, SERD_LITERAL}; + SerdNode node = {str, len, 0, SERD_LITERAL}; if (serd_base64_encode(str, buf, size, wrap_lines)) { node.flags |= SERD_HAS_NEWLINE; } - return node; } diff --git a/src/reader.c b/src/reader.c index e21b6aa5..50da1979 100644 --- a/src/reader.c +++ b/src/reader.c @@ -45,7 +45,7 @@ set_blank_id(SerdReader* reader, Ref ref, size_t buf_size) { SerdNode* node = deref(reader, ref); const char* prefix = reader->bprefix ? (const char*)reader->bprefix : ""; - node->n_bytes = node->n_chars = (size_t)snprintf( + node->n_bytes = (size_t)snprintf( (char*)node->buf, buf_size, "%sb%u", prefix, reader->next_id++); } @@ -90,10 +90,10 @@ push_node_padded(SerdReader* reader, &reader->stack, sizeof(SerdNode) + maxlen + 1, sizeof(SerdNode)); SerdNode* const node = (SerdNode*)mem; - node->n_bytes = node->n_chars = n_bytes; - node->flags = 0; - node->type = type; - node->buf = NULL; + node->n_bytes = n_bytes; + node->flags = 0; + node->type = type; + node->buf = NULL; uint8_t* buf = (uint8_t*)(node + 1); memcpy(buf, str, n_bytes + 1); diff --git a/src/reader.h b/src/reader.h index 3a8991f4..e5ee2810 100644 --- a/src/reader.h +++ b/src/reader.h @@ -174,12 +174,7 @@ push_byte(SerdReader* reader, Ref ref, const int c) uint8_t* const s = (uint8_t*)serd_stack_push(&reader->stack, 1); SerdNode* const node = (SerdNode*)(reader->stack.buf + ref); - ++node->n_bytes; - if (!(c & 0x80)) { // Starts with 0 bit, start of new character - ++node->n_chars; - } - *(s - 1) = (uint8_t)c; *s = '\0'; return SERD_SUCCESS; diff --git a/src/string.c b/src/string.c index b83ad58f..defbaac4 100644 --- a/src/string.c +++ b/src/string.c @@ -18,9 +18,11 @@ #include "serd/serd.h" +#include <assert.h> #include <math.h> #include <stdint.h> #include <stdlib.h> +#include <string.h> void serd_free(void* ptr) @@ -74,46 +76,32 @@ serd_update_flags(const uint8_t c, SerdNodeFlags* const flags) size_t serd_substrlen(const uint8_t* const str, const size_t len, - size_t* const n_bytes, SerdNodeFlags* const flags) { - size_t n_chars = 0; - size_t i = 0; - SerdNodeFlags f = 0; + assert(flags); + + size_t i = 0; + *flags = 0; for (; i < len && str[i]; ++i) { - if ((str[i] & 0xC0) != 0x80) { // Start of new character - ++n_chars; - serd_update_flags(str[i], &f); - } - } - if (n_bytes) { - *n_bytes = i; + serd_update_flags(str[i], flags); } - if (flags) { - *flags = f; - } - return n_chars; + + return i; } size_t -serd_strlen(const uint8_t* str, size_t* n_bytes, SerdNodeFlags* flags) +serd_strlen(const uint8_t* str, SerdNodeFlags* flags) { - size_t n_chars = 0; - size_t i = 0; - SerdNodeFlags f = 0; - for (; str[i]; ++i) { - if ((str[i] & 0xC0) != 0x80) { // Start of new character - ++n_chars; - serd_update_flags(str[i], &f); - } - } - if (n_bytes) { - *n_bytes = i; - } if (flags) { - *flags = f; + size_t i = 0; + *flags = 0; + for (; str[i]; ++i) { + serd_update_flags(str[i], flags); + } + return i; } - return n_chars; + + return strlen((const char*)str); } static inline double diff --git a/src/string_utils.h b/src/string_utils.h index ac2b40c5..60f677d0 100644 --- a/src/string_utils.h +++ b/src/string_utils.h @@ -97,10 +97,7 @@ is_windows_path(const uint8_t* path) } size_t -serd_substrlen(const uint8_t* str, - size_t len, - size_t* n_bytes, - SerdNodeFlags* flags); +serd_substrlen(const uint8_t* str, size_t len, SerdNodeFlags* flags); static inline char serd_to_upper(const char c) diff --git a/src/writer.c b/src/writer.c index ee9ae352..2c483c4e 100644 --- a/src/writer.c +++ b/src/writer.c @@ -44,9 +44,9 @@ typedef struct { SerdNode predicate; } WriteContext; -static const WriteContext WRITE_CONTEXT_NULL = {{0, 0, 0, 0, SERD_NOTHING}, - {0, 0, 0, 0, SERD_NOTHING}, - {0, 0, 0, 0, SERD_NOTHING}}; +static const WriteContext WRITE_CONTEXT_NULL = {{0, 0, 0, SERD_NOTHING}, + {0, 0, 0, SERD_NOTHING}, + {0, 0, 0, SERD_NOTHING}}; typedef enum { SEP_NONE, @@ -164,7 +164,6 @@ copy_node(SerdNode* dst, const SerdNode* src) if (src) { dst->buf = (uint8_t*)realloc((char*)dst->buf, src->n_bytes + 1); dst->n_bytes = src->n_bytes; - dst->n_chars = src->n_chars; dst->flags = src->flags; dst->type = src->type; memcpy((char*)dst->buf, src->buf, src->n_bytes + 1); diff --git a/test/test_node.c b/test/test_node.c index 9a2db562..d0302680 100644 --- a/test/test_node.c +++ b/test/test_node.c @@ -99,7 +99,7 @@ test_double_to_node(void) : ((const char*)node.buf == dbl_test_strs[i]); assert(pass); const size_t len = node.buf ? strlen((const char*)node.buf) : 0; - assert(node.n_bytes == len && node.n_chars == len); + assert(node.n_bytes == len); serd_node_free(&node); } } @@ -116,7 +116,7 @@ test_integer_to_node(void) SerdNode node = serd_node_new_integer(int_test_nums[i]); assert(!strcmp((const char*)node.buf, (const char*)int_test_strs[i])); const size_t len = strlen((const char*)node.buf); - assert(node.n_bytes == len && node.n_chars == len); + assert(node.n_bytes == len); serd_node_free(&node); } } @@ -132,7 +132,6 @@ test_blob_to_node(void) SerdNode blob = serd_node_new_blob(data, size, size % 5); - assert(blob.n_bytes == blob.n_chars); assert(blob.n_bytes == strlen((const char*)blob.buf)); size_t out_size = 0; @@ -172,8 +171,7 @@ test_node_from_string(void) SerdNode node = serd_node_from_string(SERD_LITERAL, (const uint8_t*)"hello\""); - assert(node.n_bytes == 6 && node.n_chars == 6 && - node.flags == SERD_HAS_QUOTE && + assert(node.n_bytes == 6 && node.flags == SERD_HAS_QUOTE && !strcmp((const char*)node.buf, "hello\"")); node = serd_node_from_string(SERD_URI, NULL); @@ -184,15 +182,14 @@ static void test_node_from_substring(void) { SerdNode empty = serd_node_from_substring(SERD_LITERAL, NULL, 32); - assert(!empty.buf && !empty.n_bytes && !empty.n_chars && !empty.flags && - !empty.type); + assert(!empty.buf && !empty.n_bytes && !empty.flags && !empty.type); SerdNode a_b = serd_node_from_substring(SERD_LITERAL, USTR("a\"bc"), 3); - assert(a_b.n_bytes == 3 && a_b.n_chars == 3 && a_b.flags == SERD_HAS_QUOTE && + assert(a_b.n_bytes == 3 && a_b.flags == SERD_HAS_QUOTE && !strncmp((const char*)a_b.buf, "a\"b", 3)); a_b = serd_node_from_substring(SERD_LITERAL, USTR("a\"bc"), 10); - assert(a_b.n_bytes == 4 && a_b.n_chars == 4 && a_b.flags == SERD_HAS_QUOTE && + assert(a_b.n_bytes == 4 && a_b.flags == SERD_HAS_QUOTE && !strncmp((const char*)a_b.buf, "a\"bc", 4)); } diff --git a/test/test_string.c b/test/test_string.c index cdb7a50d..27a488fb 100644 --- a/test/test_string.c +++ b/test/test_string.c @@ -28,15 +28,10 @@ test_strlen(void) { const uint8_t str[] = {'"', '5', 0xE2, 0x82, 0xAC, '"', '\n', 0}; - size_t n_bytes = 0; SerdNodeFlags flags = 0; - size_t len = serd_strlen(str, &n_bytes, &flags); - assert(len == 5 && n_bytes == 7 && - flags == (SERD_HAS_QUOTE | SERD_HAS_NEWLINE)); - len = serd_strlen(str, NULL, &flags); - assert(len == 5); - - assert(serd_strlen(str, &n_bytes, NULL) == 5); + size_t n_bytes = serd_strlen(str, &flags); + assert(n_bytes == 7 && flags == (SERD_HAS_QUOTE | SERD_HAS_NEWLINE)); + assert(serd_strlen(str, NULL) == 7); } static void |