diff options
-rw-r--r-- | NEWS | 1 | ||||
-rw-r--r-- | serd/serd.h | 8 | ||||
-rw-r--r-- | src/env.c | 5 | ||||
-rw-r--r-- | src/node.c | 34 | ||||
-rw-r--r-- | src/reader.c | 4 | ||||
-rw-r--r-- | src/reader.h | 3 | ||||
-rw-r--r-- | src/serd_internal.h | 1 | ||||
-rw-r--r-- | src/string.c | 45 | ||||
-rw-r--r-- | src/writer.c | 7 | ||||
-rw-r--r-- | tests/serd_test.c | 49 |
10 files changed, 58 insertions, 99 deletions
@@ -3,6 +3,7 @@ serd (1.0.0) unstable; * Use SerdBuffer for mutable buffers instead of abusing SerdChunk * Add serd_node_new_relative_uri() * Fix construction and comparison of URIs with UTF-8 characters + * Remove useless character counting * Report I/O errors with message and return appropriate status code * Report missing statement separator errors * Fix potential out of bounds read diff --git a/serd/serd.h b/serd/serd.h index 49ee3c3a..d289ee72 100644 --- a/serd/serd.h +++ b/serd/serd.h @@ -217,7 +217,6 @@ typedef uint32_t SerdNodeFlags; typedef struct { const uint8_t* buf; /**< Value string */ size_t n_bytes; /**< Size in bytes (not including null) */ - size_t n_chars; /**< Length in characters (not including null)*/ SerdNodeFlags flags; /**< Node flags (e.g. string properties) */ SerdType type; /**< Node type */ } SerdNode; @@ -296,14 +295,13 @@ serd_strerror(SerdStatus status); /** Measure a UTF-8 string. - @return Length of `str` in characters (except NULL). + @return Length of `str` in bytes. @param str A null-terminated UTF-8 string. - @param n_bytes (Output) Set to the size of `str` in bytes (except NULL). @param flags (Output) Set to the applicable flags. */ SERD_API size_t -serd_strlen(const uint8_t* str, size_t* n_bytes, SerdNodeFlags* flags); +serd_strlen(const uint8_t* str, SerdNodeFlags* flags); /** Parse a string to a double. @@ -445,7 +443,7 @@ serd_uri_serialise_relative(const SerdURI* uri, @{ */ -static const SerdNode SERD_NODE_NULL = { NULL, 0, 0, 0, SERD_NOTHING }; +static const SerdNode SERD_NODE_NULL = { NULL, 0, 0, SERD_NOTHING }; /** Make a (shallow) node from `str`. @@ -245,9 +245,8 @@ serd_env_expand_node(const SerdEnv* env, } const size_t len = prefix.len + suffix.len; uint8_t* buf = (uint8_t*)malloc(len + 1); - SerdNode ret = { buf, len, 0, 0, SERD_URI }; - snprintf((char*)buf, len + 1, "%s%s", prefix.buf, suffix.buf); - ret.n_chars = serd_strlen(buf, NULL, NULL); + SerdNode ret = { buf, len, 0, SERD_URI }; + snprintf((char*)buf, ret.n_bytes + 1, "%s%s", prefix.buf, suffix.buf); return ret; } case SERD_URI: { @@ -39,10 +39,9 @@ serd_node_from_string(SerdType type, const uint8_t* str) return SERD_NODE_NULL; } - uint32_t flags = 0; - size_t buf_n_bytes = 0; - const size_t buf_n_chars = serd_strlen(str, &buf_n_bytes, &flags); - SerdNode ret = { str, buf_n_bytes, buf_n_chars, flags, type }; + uint32_t flags = 0; + const size_t n_bytes = serd_strlen(str, &flags); + const SerdNode ret = { str, n_bytes, flags, type }; return ret; } @@ -54,11 +53,9 @@ serd_node_from_substring(SerdType type, const uint8_t* str, const size_t len) return SERD_NODE_NULL; } - uint32_t flags = 0; - size_t buf_n_bytes = 0; - const size_t buf_n_chars = serd_substrlen(str, len, &buf_n_bytes, &flags); - assert(buf_n_bytes <= len); - SerdNode ret = { str, buf_n_bytes, buf_n_chars, flags, type }; + uint32_t flags = 0; + const size_t n_bytes = serd_substrlen(str, len, &flags); + const SerdNode ret = { str, n_bytes, flags, type }; return ret; } @@ -84,7 +81,6 @@ serd_node_equals(const SerdNode* a, const SerdNode* b) return (a == b) || (a->type == b->type && a->n_bytes == b->n_bytes - && a->n_chars == b->n_chars && ((a->buf == b->buf) || !memcmp((const char*)a->buf, (const char*)b->buf, a->n_bytes + 1))); @@ -218,13 +214,12 @@ serd_node_new_uri(const SerdURI* uri, const SerdURI* base, SerdURI* out) const size_t len = serd_uri_string_length(&abs_uri); uint8_t* buf = (uint8_t*)malloc(len + 1); - SerdNode node = { buf, 0, 0, 0, SERD_URI }; + SerdNode node = { buf, len, 0, SERD_URI }; uint8_t* ptr = buf; const size_t actual_len = serd_uri_serialise(&abs_uri, string_sink, &ptr); buf[actual_len] = '\0'; node.n_bytes = actual_len; - node.n_chars = serd_strlen(buf, NULL, NULL); if (out) { serd_uri_parse(buf, out); // TODO: cleverly avoid double parse @@ -243,14 +238,13 @@ serd_node_new_relative_uri(const SerdURI* uri, const size_t uri_len = serd_uri_string_length(uri); const size_t base_len = serd_uri_string_length(base); uint8_t* buf = (uint8_t*)malloc(uri_len + base_len + 1); - SerdNode node = { buf, 0, 0, 0, SERD_URI }; + SerdNode node = { buf, 0, 0, SERD_URI }; uint8_t* ptr = buf; const size_t actual_len = serd_uri_serialise_relative( uri, base, root, string_sink, &ptr); buf[actual_len] = '\0'; node.n_bytes = actual_len; - node.n_chars = serd_strlen(buf, NULL, NULL); if (out) { serd_uri_parse(buf, out); // TODO: cleverly avoid double parse @@ -277,7 +271,7 @@ serd_node_new_decimal(double d, unsigned frac_digits) const double abs_d = fabs(d); const unsigned int_digits = serd_digits(abs_d); char* buf = (char*)calloc(int_digits + frac_digits + 3, 1); - SerdNode node = { (const uint8_t*)buf, 0, 0, 0, SERD_LITERAL }; + SerdNode node = { (const uint8_t*)buf, 0, 0, SERD_LITERAL }; const double int_part = floor(abs_d); // Point s to decimal point location @@ -300,7 +294,7 @@ serd_node_new_decimal(double d, unsigned frac_digits) double frac_part = fabs(d - int_part); if (frac_part < DBL_EPSILON) { *s++ = '0'; - node.n_bytes = node.n_chars = (s - buf); + node.n_bytes = (s - buf); } else { uint64_t frac = frac_part * pow(10.0, (int)frac_digits) + 0.5; s += frac_digits - 1; @@ -309,7 +303,7 @@ serd_node_new_decimal(double d, unsigned frac_digits) // Skip trailing zeros for (; i < frac_digits - 1 && !(frac % 10); ++i, --s, frac /= 10) {} - node.n_bytes = node.n_chars = (s - buf) + 1; + node.n_bytes = (s - buf) + 1; // Write digits from last trailing zero to decimal point for (; i < frac_digits; ++i) { @@ -328,7 +322,7 @@ serd_node_new_integer(int64_t i) int64_t abs_i = (i < 0) ? -i : i; const unsigned digits = serd_digits(abs_i); char* buf = (char*)calloc(digits + 2, 1); - SerdNode node = { (const uint8_t*)buf, 0, 0, 0, SERD_LITERAL }; + SerdNode node = { (const uint8_t*)buf, 0, 0, SERD_LITERAL }; // Point s to the end char* s = buf + digits - 1; @@ -337,7 +331,7 @@ serd_node_new_integer(int64_t i) ++s; } - node.n_bytes = node.n_chars = (s - buf) + 1; + node.n_bytes = (s - buf) + 1; // Write integer part (right to left) do { @@ -374,7 +368,7 @@ serd_node_new_blob(const void* buf, size_t size, bool wrap_lines) { const size_t len = ((size + 2) / 3) * 4 + (wrap_lines ? (size / 57) : 0); uint8_t* str = (uint8_t*)calloc(1, len + 2); - SerdNode node = { str, len, len, 0, SERD_LITERAL }; + SerdNode node = { str, len, 0, SERD_LITERAL }; for (size_t i = 0, j = 0; i < size; i += 3, j += 4) { uint8_t in[4] = { 0, 0, 0, 0 }; size_t n_in = MIN(3, size - i); diff --git a/src/reader.c b/src/reader.c index d515aaab..4266e128 100644 --- a/src/reader.c +++ b/src/reader.c @@ -41,7 +41,7 @@ set_blank_id(SerdReader* reader, Ref ref, size_t buf_size) { SerdNode* node = deref(reader, ref); const char* prefix = reader->bprefix ? (const char*)reader->bprefix : ""; - node->n_bytes = node->n_chars = snprintf( + node->n_bytes = snprintf( (char*)node->buf, buf_size, "%sb%u", prefix, reader->next_id++); } @@ -80,7 +80,7 @@ push_node_padded(SerdReader* reader, size_t maxlen, &reader->stack, sizeof(SerdNode) + maxlen + 1, sizeof(SerdNode)); SerdNode* const node = (SerdNode*)mem; - node->n_bytes = node->n_chars = n_bytes; + node->n_bytes = n_bytes; node->flags = 0; node->type = type; node->buf = NULL; diff --git a/src/reader.h b/src/reader.h index 723a1c5a..7dc981af 100644 --- a/src/reader.h +++ b/src/reader.h @@ -69,9 +69,6 @@ push_byte(SerdReader* reader, Ref ref, const uint8_t c) uint8_t* const s = serd_stack_push(&reader->stack, 1); SerdNode* const node = (SerdNode*)(reader->stack.buf + ref); ++node->n_bytes; - if (!(c & 0x80)) { // Starts with 0 bit, start of new character - ++node->n_chars; - } *(s - 1) = c; *s = '\0'; return SERD_SUCCESS; diff --git a/src/serd_internal.h b/src/serd_internal.h index cfcdc82c..46b88c83 100644 --- a/src/serd_internal.h +++ b/src/serd_internal.h @@ -353,7 +353,6 @@ is_windows_path(const uint8_t* path) size_t serd_substrlen(const uint8_t* str, const size_t len, - size_t* n_bytes, SerdNodeFlags* flags); static inline int diff --git a/src/string.c b/src/string.c index dedd0713..ee2ac290 100644 --- a/src/string.c +++ b/src/string.c @@ -51,47 +51,32 @@ serd_update_flags(const uint8_t c, SerdNodeFlags* const flags) size_t serd_substrlen(const uint8_t* const str, const size_t len, - size_t* const n_bytes, SerdNodeFlags* const flags) { - size_t n_chars = 0; - size_t i = 0; - SerdNodeFlags f = 0; - for (; i < len && str[i]; ++i) { - if ((str[i] & 0xC0) != 0x80) { // Start of new character - ++n_chars; - serd_update_flags(str[i], &f); - } - } - if (n_bytes) { - *n_bytes = i; - } if (flags) { - *flags = f; + size_t i = 0; + *flags = 0; + for (; i < len && str[i]; ++i) { + serd_update_flags(str[i], flags); + } + return i; } - return n_chars; + return strlen((const char*)str); } SERD_API size_t -serd_strlen(const uint8_t* str, size_t* n_bytes, SerdNodeFlags* flags) +serd_strlen(const uint8_t* str, SerdNodeFlags* flags) { - size_t n_chars = 0; - size_t i = 0; - SerdNodeFlags f = 0; - for (; str[i]; ++i) { - if ((str[i] & 0xC0) != 0x80) { // Start of new character - ++n_chars; - serd_update_flags(str[i], &f); - } - } - if (n_bytes) { - *n_bytes = i; - } if (flags) { - *flags = f; + size_t i = 0; + *flags = 0; + for (; str[i]; ++i) { + serd_update_flags(str[i], flags); + } + return i; } - return n_chars; + return strlen((const char*)str); } static inline double diff --git a/src/writer.c b/src/writer.c index 26bf5e5a..cecaacf5 100644 --- a/src/writer.c +++ b/src/writer.c @@ -28,9 +28,9 @@ typedef struct { } WriteContext; static const WriteContext WRITE_CONTEXT_NULL = { - { 0, 0, 0, 0, SERD_NOTHING }, - { 0, 0, 0, 0, SERD_NOTHING }, - { 0, 0, 0, 0, SERD_NOTHING } + { 0, 0, 0, SERD_NOTHING }, + { 0, 0, 0, SERD_NOTHING }, + { 0, 0, 0, SERD_NOTHING } }; typedef enum { @@ -148,7 +148,6 @@ copy_node(SerdNode* dst, const SerdNode* src) if (src) { dst->buf = (uint8_t*)realloc((char*)dst->buf, src->n_bytes + 1); dst->n_bytes = src->n_bytes; - dst->n_chars = src->n_chars; dst->flags = src->flags; dst->type = src->type; memcpy((char*)dst->buf, src->buf, src->n_bytes + 1); diff --git a/tests/serd_test.c b/tests/serd_test.c index 62621e16..71801c1b 100644 --- a/tests/serd_test.c +++ b/tests/serd_test.c @@ -163,9 +163,9 @@ main(void) node.buf, dbl_test_strs[i]); } const size_t len = node.buf ? strlen((const char*)node.buf) : 0; - if (node.n_bytes != len || node.n_chars != len) { - FAILF("Length %zu,%zu != %zu\n", - node.n_bytes, node.n_chars, len); + if (node.n_bytes != len) { + FAILF("Length %zu != %zu\n", + node.n_bytes, len); } serd_node_free(&node); } @@ -187,9 +187,9 @@ main(void) node.buf, int_test_strs[i]); } const size_t len = strlen((const char*)node.buf); - if (node.n_bytes != len || node.n_chars != len) { + if (node.n_bytes != len) { FAILF("Length %zu,%zu != %zu\n", - node.n_bytes, node.n_chars, len); + node.n_bytes, len); } serd_node_free(&node); } @@ -202,12 +202,6 @@ main(void) } SerdNode blob = serd_node_new_blob(data, size, size % 5); - - if (blob.n_bytes != blob.n_chars) { - FAILF("Blob %zu bytes != %zu chars\n", - blob.n_bytes, blob.n_chars); - } - size_t out_size; uint8_t* out = (uint8_t*)serd_base64_decode( blob.buf, blob.n_bytes, &out_size); @@ -230,18 +224,11 @@ main(void) const uint8_t str[] = { '"', '5', 0xE2, 0x82, 0xAC, '"', '\n', 0 }; - size_t n_bytes; SerdNodeFlags flags; - size_t len = serd_strlen(str, &n_bytes, &flags); - if (len != 5 || n_bytes != 7 - || flags != (SERD_HAS_QUOTE|SERD_HAS_NEWLINE)) { - FAILF("Bad serd_strlen(%s) len=%zu n_bytes=%zu flags=%u\n", - str, len, n_bytes, flags); - } - len = serd_strlen(str, NULL, &flags); - if (len != 5) { - FAILF("Bad serd_strlen(%s) len=%zu flags=%u\n", - str, len, flags); + size_t n_bytes = serd_strlen(str, &flags); + if (n_bytes != 7 || flags != (SERD_HAS_QUOTE|SERD_HAS_NEWLINE)) { + FAILF("Bad serd_strlen(%s) n_bytes=%zu flags=%u\n", + str, n_bytes, flags); } // Test serd_strerror @@ -346,10 +333,10 @@ main(void) // Test serd_node_from_string SerdNode node = serd_node_from_string(SERD_LITERAL, (const uint8_t*)"hello\""); - if (node.n_bytes != 6 || node.n_chars != 6 || node.flags != SERD_HAS_QUOTE + if (node.n_bytes != 6 || node.flags != SERD_HAS_QUOTE || strcmp((const char*)node.buf, "hello\"")) { - FAILF("Bad node %s %zu %zu %d %d\n", - node.buf, node.n_bytes, node.n_chars, node.flags, node.type); + FAILF("Bad node %s %zu %d %d\n", + node.buf, node.n_bytes, node.flags, node.type); } node = serd_node_from_string(SERD_URI, NULL); @@ -360,22 +347,22 @@ main(void) // Test serd_node_from_substring SerdNode empty = serd_node_from_substring(SERD_LITERAL, NULL, 32); - if (empty.buf || empty.n_bytes || empty.n_chars || empty.flags || empty.type) { + if (empty.buf || empty.n_bytes || empty.flags || empty.type) { FAIL("Successfully created node from NULL substring\n"); } SerdNode a_b = serd_node_from_substring(SERD_LITERAL, USTR("a\"bc"), 3); - if (a_b.n_bytes != 3 || a_b.n_chars != 3 || a_b.flags != SERD_HAS_QUOTE + if (a_b.n_bytes != 3 || a_b.flags != SERD_HAS_QUOTE || strncmp((const char*)a_b.buf, "a\"b", 3)) { - FAILF("Bad node %s %zu %zu %d %d\n", - a_b.buf, a_b.n_bytes, a_b.n_chars, a_b.flags, a_b.type); + FAILF("Bad node %s %zu %d %d\n", + a_b.buf, a_b.n_bytes, a_b.flags, a_b.type); } a_b = serd_node_from_substring(SERD_LITERAL, USTR("a\"bc"), 10); - if (a_b.n_bytes != 4 || a_b.n_chars != 4 || a_b.flags != SERD_HAS_QUOTE + if (a_b.n_bytes != 4 || a_b.flags != SERD_HAS_QUOTE || strncmp((const char*)a_b.buf, "a\"bc", 4)) { FAILF("Bad node %s %zu %zu %d %d\n", - a_b.buf, a_b.n_bytes, a_b.n_chars, a_b.flags, a_b.type); + a_b.buf, a_b.n_bytes, a_b.flags, a_b.type); } // Test serd_node_new_uri_from_string |