From d7197f5240418e0bade07682d1b82e25ac176110 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Tue, 15 Mar 2016 23:37:09 -0400 Subject: Remove useless character counting --- NEWS | 1 + serd/serd.h | 8 +++----- src/env.c | 5 ++--- src/node.c | 34 ++++++++++++++-------------------- src/reader.c | 4 ++-- src/reader.h | 3 --- src/serd_internal.h | 5 +---- src/string.c | 45 +++++++++++++++------------------------------ src/writer.c | 7 +++---- tests/serd_test.c | 28 +++++++++------------------- 10 files changed, 50 insertions(+), 90 deletions(-) diff --git a/NEWS b/NEWS index 44bb1688..f5720041 100644 --- a/NEWS +++ b/NEWS @@ -3,6 +3,7 @@ serd (1.0.0) unstable; * Use SerdBuffer for mutable buffers instead of abusing SerdChunk * Add serd_node_new_relative_uri() * Fix construction and comparison of URIs with UTF-8 characters + * Remove useless character counting * Report I/O errors with message and return appropriate status code * Report missing statement separator errors * Fix potential out of bounds read diff --git a/serd/serd.h b/serd/serd.h index a24f3a36..ddfc39f9 100644 --- a/serd/serd.h +++ b/serd/serd.h @@ -216,7 +216,6 @@ typedef uint32_t SerdNodeFlags; typedef struct { const uint8_t* buf; /**< Value string */ size_t n_bytes; /**< Size in bytes (not including null) */ - size_t n_chars; /**< Length in characters (not including null)*/ SerdNodeFlags flags; /**< Node flags (e.g. string properties) */ SerdType type; /**< Node type */ } SerdNode; @@ -306,14 +305,13 @@ serd_strerror(SerdStatus status); /** Measure a UTF-8 string. - @return Length of `str` in characters (except NULL). + @return Length of `str` in bytes. @param str A null-terminated UTF-8 string. - @param n_bytes (Output) Set to the size of `str` in bytes (except NULL). @param flags (Output) Set to the applicable flags. */ SERD_API size_t -serd_strlen(const uint8_t* str, size_t* n_bytes, SerdNodeFlags* flags); +serd_strlen(const uint8_t* str, SerdNodeFlags* flags); /** Parse a string to a double. @@ -461,7 +459,7 @@ serd_uri_serialise_relative(const SerdURI* uri, @{ */ -static const SerdNode SERD_NODE_NULL = { NULL, 0, 0, 0, SERD_NOTHING }; +static const SerdNode SERD_NODE_NULL = { NULL, 0, 0, SERD_NOTHING }; /** Make a (shallow) node from `str`. diff --git a/src/env.c b/src/env.c index b26c6b20..582c1b44 100644 --- a/src/env.c +++ b/src/env.c @@ -213,9 +213,8 @@ serd_env_expand_node(const SerdEnv* env, } const size_t len = prefix.len + suffix.len; uint8_t* buf = (uint8_t*)malloc(len + 1); - SerdNode ret = { buf, len, 0, 0, SERD_URI }; - snprintf((char*)buf, len + 1, "%s%s", prefix.buf, suffix.buf); - ret.n_chars = serd_strlen(buf, NULL, NULL); + SerdNode ret = { buf, len, 0, SERD_URI }; + snprintf((char*)buf, ret.n_bytes + 1, "%s%s", prefix.buf, suffix.buf); return ret; } case SERD_URI: { diff --git a/src/node.c b/src/node.c index 22555493..a2476550 100644 --- a/src/node.c +++ b/src/node.c @@ -37,10 +37,9 @@ serd_node_from_string(SerdType type, const uint8_t* str) return SERD_NODE_NULL; } - uint32_t flags = 0; - size_t buf_n_bytes = 0; - const size_t buf_n_chars = serd_strlen(str, &buf_n_bytes, &flags); - SerdNode ret = { str, buf_n_bytes, buf_n_chars, flags, type }; + uint32_t flags = 0; + const size_t n_bytes = serd_strlen(str, &flags); + const SerdNode ret = { str, n_bytes, flags, type }; return ret; } @@ -51,11 +50,9 @@ serd_node_from_substring(SerdType type, const uint8_t* str, const size_t len) return SERD_NODE_NULL; } - uint32_t flags = 0; - size_t buf_n_bytes = 0; - const size_t buf_n_chars = serd_substrlen(str, len, &buf_n_bytes, &flags); - assert(buf_n_bytes <= len); - SerdNode ret = { str, buf_n_bytes, buf_n_chars, flags, type }; + uint32_t flags = 0; + const size_t n_bytes = serd_substrlen(str, len, &flags); + const SerdNode ret = { str, n_bytes, flags, type }; return ret; } @@ -79,7 +76,6 @@ serd_node_equals(const SerdNode* a, const SerdNode* b) return (a == b) || (a->type == b->type && a->n_bytes == b->n_bytes - && a->n_chars == b->n_chars && ((a->buf == b->buf) || !memcmp((const char*)a->buf, (const char*)b->buf, a->n_bytes + 1))); @@ -209,13 +205,12 @@ serd_node_new_uri(const SerdURI* uri, const SerdURI* base, SerdURI* out) const size_t len = serd_uri_string_length(&abs_uri); uint8_t* buf = (uint8_t*)malloc(len + 1); - SerdNode node = { buf, 0, 0, 0, SERD_URI }; + SerdNode node = { buf, len, 0, SERD_URI }; uint8_t* ptr = buf; const size_t actual_len = serd_uri_serialise(&abs_uri, string_sink, &ptr); buf[actual_len] = '\0'; node.n_bytes = actual_len; - node.n_chars = serd_strlen(buf, NULL, NULL); if (out) { serd_uri_parse(buf, out); // TODO: cleverly avoid double parse @@ -233,14 +228,13 @@ serd_node_new_relative_uri(const SerdURI* uri, const size_t uri_len = serd_uri_string_length(uri); const size_t base_len = serd_uri_string_length(base); uint8_t* buf = (uint8_t*)malloc(uri_len + base_len + 1); - SerdNode node = { buf, 0, 0, 0, SERD_URI }; + SerdNode node = { buf, 0, 0, SERD_URI }; uint8_t* ptr = buf; const size_t actual_len = serd_uri_serialise_relative( uri, base, root, string_sink, &ptr); buf[actual_len] = '\0'; node.n_bytes = actual_len; - node.n_chars = serd_strlen(buf, NULL, NULL); if (out) { serd_uri_parse(buf, out); // TODO: cleverly avoid double parse @@ -266,7 +260,7 @@ serd_node_new_decimal(double d, unsigned frac_digits) const double abs_d = fabs(d); const unsigned int_digits = serd_digits(abs_d); char* buf = (char*)calloc(int_digits + frac_digits + 3, 1); - SerdNode node = { (const uint8_t*)buf, 0, 0, 0, SERD_LITERAL }; + SerdNode node = { (const uint8_t*)buf, 0, 0, SERD_LITERAL }; const double int_part = floor(abs_d); // Point s to decimal point location @@ -289,7 +283,7 @@ serd_node_new_decimal(double d, unsigned frac_digits) double frac_part = fabs(d - int_part); if (frac_part < DBL_EPSILON) { *s++ = '0'; - node.n_bytes = node.n_chars = (s - buf); + node.n_bytes = (s - buf); } else { uint64_t frac = lround(frac_part * pow(10.0, (int)frac_digits)); s += frac_digits - 1; @@ -298,7 +292,7 @@ serd_node_new_decimal(double d, unsigned frac_digits) // Skip trailing zeros for (; i < frac_digits - 1 && !(frac % 10); ++i, --s, frac /= 10) {} - node.n_bytes = node.n_chars = (s - buf) + 1; + node.n_bytes = (s - buf) + 1; // Write digits from last trailing zero to decimal point for (; i < frac_digits; ++i) { @@ -316,7 +310,7 @@ serd_node_new_integer(int64_t i) int64_t abs_i = (i < 0) ? -i : i; const unsigned digits = serd_digits(abs_i); char* buf = (char*)calloc(digits + 2, 1); - SerdNode node = { (const uint8_t*)buf, 0, 0, 0, SERD_LITERAL }; + SerdNode node = { (const uint8_t*)buf, 0, 0, SERD_LITERAL }; // Point s to the end char* s = buf + digits - 1; @@ -325,7 +319,7 @@ serd_node_new_integer(int64_t i) ++s; } - node.n_bytes = node.n_chars = (s - buf) + 1; + node.n_bytes = (s - buf) + 1; // Write integer part (right to left) do { @@ -361,7 +355,7 @@ serd_node_new_blob(const void* buf, size_t size, bool wrap_lines) { const size_t len = (size + 2) / 3 * 4 + (wrap_lines * ((size - 1) / 57)); uint8_t* str = (uint8_t*)calloc(len + 2, 1); - SerdNode node = { str, len, len, 0, SERD_LITERAL }; + SerdNode node = { str, len, 0, SERD_LITERAL }; for (size_t i = 0, j = 0; i < size; i += 3, j += 4) { uint8_t in[4] = { 0, 0, 0, 0 }; size_t n_in = MIN(3, size - i); diff --git a/src/reader.c b/src/reader.c index 1a7f58d4..1449814e 100644 --- a/src/reader.c +++ b/src/reader.c @@ -41,7 +41,7 @@ set_blank_id(SerdReader* reader, Ref ref, size_t buf_size) { SerdNode* node = deref(reader, ref); const char* prefix = reader->bprefix ? (const char*)reader->bprefix : ""; - node->n_bytes = node->n_chars = snprintf( + node->n_bytes = snprintf( (char*)node->buf, buf_size, "%sb%u", prefix, reader->next_id++); } @@ -83,7 +83,7 @@ push_node_padded(SerdReader* reader, size_t maxlen, &reader->stack, sizeof(SerdNode) + maxlen + 1, sizeof(SerdNode)); SerdNode* const node = (SerdNode*)mem; - node->n_bytes = node->n_chars = n_bytes; + node->n_bytes = n_bytes; node->flags = 0; node->type = type; node->buf = NULL; diff --git a/src/reader.h b/src/reader.h index f6cbc7ad..117dd462 100644 --- a/src/reader.h +++ b/src/reader.h @@ -71,9 +71,6 @@ push_byte(SerdReader* reader, Ref ref, const uint8_t c) uint8_t* const s = serd_stack_push(&reader->stack, 1); SerdNode* const node = (SerdNode*)(reader->stack.buf + ref); ++node->n_bytes; - if (!(c & 0x80)) { // Starts with 0 bit, start of new character - ++node->n_chars; - } *(s - 1) = c; *s = '\0'; return SERD_SUCCESS; diff --git a/src/serd_internal.h b/src/serd_internal.h index dfb369b3..9cce9d14 100644 --- a/src/serd_internal.h +++ b/src/serd_internal.h @@ -381,10 +381,7 @@ is_windows_path(const uint8_t* path) /* String utilities */ size_t -serd_substrlen(const uint8_t* str, - size_t len, - size_t* n_bytes, - SerdNodeFlags* flags); +serd_substrlen(const uint8_t* str, size_t len, SerdNodeFlags* flags); static inline int serd_strncasecmp(const char* s1, const char* s2, size_t n) diff --git a/src/string.c b/src/string.c index cde82134..024f8bd0 100644 --- a/src/string.c +++ b/src/string.c @@ -56,46 +56,31 @@ serd_update_flags(const uint8_t c, SerdNodeFlags* const flags) size_t serd_substrlen(const uint8_t* const str, const size_t len, - size_t* const n_bytes, SerdNodeFlags* const flags) { - size_t n_chars = 0; - size_t i = 0; - SerdNodeFlags f = 0; - for (; i < len && str[i]; ++i) { - if ((str[i] & 0xC0) != 0x80) { // Start of new character - ++n_chars; - serd_update_flags(str[i], &f); - } - } - if (n_bytes) { - *n_bytes = i; - } if (flags) { - *flags = f; + size_t i = 0; + *flags = 0; + for (; i < len && str[i]; ++i) { + serd_update_flags(str[i], flags); + } + return i; } - return n_chars; + return strlen((const char*)str); } size_t -serd_strlen(const uint8_t* str, size_t* n_bytes, SerdNodeFlags* flags) +serd_strlen(const uint8_t* str, SerdNodeFlags* flags) { - size_t n_chars = 0; - size_t i = 0; - SerdNodeFlags f = 0; - for (; str[i]; ++i) { - if ((str[i] & 0xC0) != 0x80) { // Start of new character - ++n_chars; - serd_update_flags(str[i], &f); - } - } - if (n_bytes) { - *n_bytes = i; - } if (flags) { - *flags = f; + size_t i = 0; + *flags = 0; + for (; str[i]; ++i) { + serd_update_flags(str[i], flags); + } + return i; } - return n_chars; + return strlen((const char*)str); } static inline double diff --git a/src/writer.c b/src/writer.c index e9f928b8..1b6d0829 100644 --- a/src/writer.c +++ b/src/writer.c @@ -28,9 +28,9 @@ typedef struct { } WriteContext; static const WriteContext WRITE_CONTEXT_NULL = { - { 0, 0, 0, 0, SERD_NOTHING }, - { 0, 0, 0, 0, SERD_NOTHING }, - { 0, 0, 0, 0, SERD_NOTHING } + { 0, 0, 0, SERD_NOTHING }, + { 0, 0, 0, SERD_NOTHING }, + { 0, 0, 0, SERD_NOTHING } }; typedef enum { @@ -148,7 +148,6 @@ copy_node(SerdNode* dst, const SerdNode* src) if (src) { dst->buf = (uint8_t*)realloc((char*)dst->buf, src->n_bytes + 1); dst->n_bytes = src->n_bytes; - dst->n_chars = src->n_chars; dst->flags = src->flags; dst->type = src->type; memcpy((char*)dst->buf, src->buf, src->n_bytes + 1); diff --git a/tests/serd_test.c b/tests/serd_test.c index 12899de2..23e1401a 100644 --- a/tests/serd_test.c +++ b/tests/serd_test.c @@ -159,7 +159,7 @@ main(void) : ((const char*)node.buf == dbl_test_strs[i]); assert(pass); const size_t len = node.buf ? strlen((const char*)node.buf) : 0; - assert(node.n_bytes == len && node.n_chars == len); + assert(node.n_bytes == len); serd_node_free(&node); } @@ -177,7 +177,7 @@ main(void) SerdNode node = serd_node_new_integer(int_test_nums[i]); assert(!strcmp((const char*)node.buf, (const char*)int_test_strs[i])); const size_t len = strlen((const char*)node.buf); - assert(node.n_bytes == len && node.n_chars == len); + assert(node.n_bytes == len); serd_node_free(&node); } @@ -190,7 +190,6 @@ main(void) SerdNode blob = serd_node_new_blob(data, size, size % 5); - assert(blob.n_bytes == blob.n_chars); assert(blob.n_bytes == strlen((const char*)blob.buf)); size_t out_size; @@ -211,15 +210,10 @@ main(void) const uint8_t str[] = { '"', '5', 0xE2, 0x82, 0xAC, '"', '\n', 0 }; - size_t n_bytes; SerdNodeFlags flags; - size_t len = serd_strlen(str, &n_bytes, &flags); - assert(len == 5 && n_bytes == 7 && - flags == (SERD_HAS_QUOTE | SERD_HAS_NEWLINE)); - len = serd_strlen(str, NULL, &flags); - assert(len == 5); - - assert(serd_strlen(str, &n_bytes, NULL) == 5); + size_t n_bytes = serd_strlen(str, &flags); + assert(n_bytes == 7 && flags == (SERD_HAS_QUOTE|SERD_HAS_NEWLINE)); + assert(serd_strlen(str, NULL) == 7); // Test serd_strerror @@ -298,8 +292,7 @@ main(void) // Test serd_node_from_string SerdNode node = serd_node_from_string(SERD_LITERAL, (const uint8_t*)"hello\""); - assert(node.n_bytes == 6 && node.n_chars == 6 && - node.flags == SERD_HAS_QUOTE && + assert(node.n_bytes == 6 && node.flags == SERD_HAS_QUOTE && !strcmp((const char*)node.buf, "hello\"")); node = serd_node_from_string(SERD_URI, NULL); @@ -308,17 +301,14 @@ main(void) // Test serd_node_from_substring SerdNode empty = serd_node_from_substring(SERD_LITERAL, NULL, 32); - assert(!empty.buf && !empty.n_bytes && !empty.n_chars && !empty.flags && - !empty.type); + assert(!empty.buf && !empty.n_bytes && !empty.flags && !empty.type); SerdNode a_b = serd_node_from_substring(SERD_LITERAL, USTR("a\"bc"), 3); - assert(a_b.n_bytes == 3 && a_b.n_chars == 3 && - a_b.flags == SERD_HAS_QUOTE && + assert(a_b.n_bytes == 3 && a_b.flags == SERD_HAS_QUOTE && !strncmp((const char*)a_b.buf, "a\"b", 3)); a_b = serd_node_from_substring(SERD_LITERAL, USTR("a\"bc"), 10); - assert(a_b.n_bytes == 4 && a_b.n_chars == 4 && - a_b.flags == SERD_HAS_QUOTE && + assert(a_b.n_bytes == 4 && a_b.flags == SERD_HAS_QUOTE && !strncmp((const char*)a_b.buf, "a\"bc", 4)); // Test serd_node_new_uri_from_string -- cgit v1.2.1