From 9e5fa742b6ef66a546c0a77c14f834f2268c5f71 Mon Sep 17 00:00:00 2001
From: David Robillard <d@drobilla.net>
Date: Tue, 15 Mar 2016 23:37:09 -0400
Subject: Remove useless character counting

---
 NEWS               |  1 +
 serd/serd.h        |  8 +++-----
 src/env.c          |  5 ++---
 src/node.c         | 35 ++++++++++++++---------------------
 src/reader.c       |  4 ++--
 src/reader.h       |  3 ---
 src/string.c       | 48 ++++++++++++++++++------------------------------
 src/string_utils.h |  5 +----
 src/writer.c       |  7 +++----
 tests/serd_test.c  | 28 +++++++++-------------------
 10 files changed, 53 insertions(+), 91 deletions(-)

diff --git a/NEWS b/NEWS
index 6004d0a5..4102a408 100644
--- a/NEWS
+++ b/NEWS
@@ -1,6 +1,7 @@
 serd (1.0.1) unstable;
 
   * Add SerdBuffer for mutable buffers to keep SerdChunk const-correct
+  * Remove useless character counting from API
 
  -- David Robillard <d@drobilla.net>  Sat, 19 Jan 2019 12:31:12 +0000
 
diff --git a/serd/serd.h b/serd/serd.h
index 08a5e151..ca133b96 100644
--- a/serd/serd.h
+++ b/serd/serd.h
@@ -220,7 +220,6 @@ typedef uint32_t SerdNodeFlags;
 typedef struct {
 	const uint8_t* buf;      /**< Value string */
 	size_t         n_bytes;  /**< Size in bytes (not including null) */
-	size_t         n_chars;  /**< Length in characters (not including null)*/
 	SerdNodeFlags  flags;    /**< Node flags (e.g. string properties) */
 	SerdType       type;     /**< Node type */
 } SerdNode;
@@ -310,14 +309,13 @@ serd_strerror(SerdStatus status);
 
 /**
    Measure a UTF-8 string.
-   @return Length of `str` in characters (except NULL).
+   @return Length of `str` in bytes.
    @param str A null-terminated UTF-8 string.
-   @param n_bytes (Output) Set to the size of `str` in bytes (except NULL).
    @param flags (Output) Set to the applicable flags.
 */
 SERD_API
 size_t
-serd_strlen(const uint8_t* str, size_t* n_bytes, SerdNodeFlags* flags);
+serd_strlen(const uint8_t* str, SerdNodeFlags* flags);
 
 /**
    Parse a string to a double.
@@ -465,7 +463,7 @@ serd_uri_serialise_relative(const SerdURI* uri,
    @{
 */
 
-static const SerdNode SERD_NODE_NULL = { NULL, 0, 0, 0, SERD_NOTHING };
+static const SerdNode SERD_NODE_NULL = { NULL, 0, 0, SERD_NOTHING };
 
 /**
    Make a (shallow) node from `str`.
diff --git a/src/env.c b/src/env.c
index e67b42c0..5c260f30 100644
--- a/src/env.c
+++ b/src/env.c
@@ -229,9 +229,8 @@ serd_env_expand_node(const SerdEnv*  env,
 		}
 		const size_t len = prefix.len + suffix.len;
 		uint8_t*     buf = (uint8_t*)malloc(len + 1);
-		SerdNode     ret = { buf, len, 0, 0, SERD_URI };
-		snprintf((char*)buf, len + 1, "%s%s", prefix.buf, suffix.buf);
-		ret.n_chars = serd_strlen(buf, NULL, NULL);
+		SerdNode     ret = { buf, len, 0, SERD_URI };
+		snprintf((char*)buf, ret.n_bytes + 1, "%s%s", prefix.buf, suffix.buf);
 		return ret;
 	}
 	case SERD_BLANK:
diff --git a/src/node.c b/src/node.c
index 8b1ad2c3..dac03b55 100644
--- a/src/node.c
+++ b/src/node.c
@@ -21,7 +21,6 @@
 
 #include "serd/serd.h"
 
-#include <assert.h>
 #include <float.h>
 #include <math.h>
 #include <stdbool.h>
@@ -46,10 +45,9 @@ serd_node_from_string(SerdType type, const uint8_t* str)
 		return SERD_NODE_NULL;
 	}
 
-	SerdNodeFlags flags       = 0;
-	size_t        buf_n_bytes = 0;
-	const size_t  buf_n_chars = serd_strlen(str, &buf_n_bytes, &flags);
-	SerdNode ret = {str, buf_n_bytes, buf_n_chars, flags, type};
+	SerdNodeFlags  flags   = 0;
+	const size_t   n_bytes = serd_strlen(str, &flags);
+	const SerdNode ret     = {str, n_bytes, flags, type};
 	return ret;
 }
 
@@ -60,11 +58,9 @@ serd_node_from_substring(SerdType type, const uint8_t* str, const size_t len)
 		return SERD_NODE_NULL;
 	}
 
-	SerdNodeFlags flags       = 0;
-	size_t        buf_n_bytes = 0;
-	const size_t  buf_n_chars = serd_substrlen(str, len, &buf_n_bytes, &flags);
-	assert(buf_n_bytes <= len);
-	SerdNode ret = { str, buf_n_bytes, buf_n_chars, flags, type };
+	SerdNodeFlags  flags   = 0;
+	const size_t   n_bytes = serd_substrlen(str, len, &flags);
+	const SerdNode ret     = {str, n_bytes, flags, type};
 	return ret;
 }
 
@@ -88,7 +84,6 @@ serd_node_equals(const SerdNode* a, const SerdNode* b)
 	return (a == b)
 		|| (a->type == b->type
 		    && a->n_bytes == b->n_bytes
-		    && a->n_chars == b->n_chars
 		    && ((a->buf == b->buf) || !memcmp((const char*)a->buf,
 		                                      (const char*)b->buf,
 		                                      a->n_bytes + 1)));
@@ -221,13 +216,12 @@ serd_node_new_uri(const SerdURI* uri, const SerdURI* base, SerdURI* out)
 
 	const size_t len        = serd_uri_string_length(&abs_uri);
 	uint8_t*     buf        = (uint8_t*)malloc(len + 1);
-	SerdNode     node       = { buf, 0, 0, 0, SERD_URI };
+	SerdNode     node       = { buf, len, 0, SERD_URI };
 	uint8_t*     ptr        = buf;
 	const size_t actual_len = serd_uri_serialise(&abs_uri, string_sink, &ptr);
 
 	buf[actual_len] = '\0';
 	node.n_bytes    = actual_len;
-	node.n_chars    = serd_strlen(buf, NULL, NULL);
 
 	if (out) {
 		serd_uri_parse(buf, out);  // TODO: cleverly avoid double parse
@@ -245,14 +239,13 @@ serd_node_new_relative_uri(const SerdURI* uri,
 	const size_t uri_len  = serd_uri_string_length(uri);
 	const size_t base_len = serd_uri_string_length(base);
 	uint8_t*     buf        = (uint8_t*)malloc(uri_len + base_len + 1);
-	SerdNode     node       = { buf, 0, 0, 0, SERD_URI };
+	SerdNode     node       = { buf, 0, 0, SERD_URI };
 	uint8_t*     ptr        = buf;
 	const size_t actual_len = serd_uri_serialise_relative(
 		uri, base, root, string_sink, &ptr);
 
 	buf[actual_len] = '\0';
 	node.n_bytes    = actual_len;
-	node.n_chars    = serd_strlen(buf, NULL, NULL);
 
 	if (out) {
 		serd_uri_parse(buf, out);  // TODO: cleverly avoid double parse
@@ -278,7 +271,7 @@ serd_node_new_decimal(double d, unsigned frac_digits)
 	const double   abs_d      = fabs(d);
 	const unsigned int_digits = serd_digits(abs_d);
 	char*          buf        = (char*)calloc(int_digits + frac_digits + 3, 1);
-	SerdNode       node       = { (const uint8_t*)buf, 0, 0, 0, SERD_LITERAL };
+	SerdNode       node       = { (const uint8_t*)buf, 0, 0, SERD_LITERAL };
 	const double   int_part   = floor(abs_d);
 
 	// Point s to decimal point location
@@ -301,7 +294,7 @@ serd_node_new_decimal(double d, unsigned frac_digits)
 	double frac_part = fabs(d - int_part);
 	if (frac_part < DBL_EPSILON) {
 		*s++ = '0';
-		node.n_bytes = node.n_chars = (size_t)(s - buf);
+		node.n_bytes = (size_t)(s - buf);
 	} else {
 		uint64_t frac = (uint64_t)llround(frac_part * pow(10.0, (int)frac_digits));
 		s += frac_digits - 1;
@@ -310,7 +303,7 @@ serd_node_new_decimal(double d, unsigned frac_digits)
 		// Skip trailing zeros
 		for (; i < frac_digits - 1 && !(frac % 10); ++i, --s, frac /= 10) {}
 
-		node.n_bytes = node.n_chars = (size_t)(s - buf) + 1u;
+		node.n_bytes = (size_t)(s - buf) + 1u;
 
 		// Write digits from last trailing zero to decimal point
 		for (; i < frac_digits; ++i) {
@@ -328,7 +321,7 @@ serd_node_new_integer(int64_t i)
 	int64_t        abs_i  = (i < 0) ? -i : i;
 	const unsigned digits = serd_digits((double)abs_i);
 	char*          buf    = (char*)calloc(digits + 2, 1);
-	SerdNode       node   = { (const uint8_t*)buf, 0, 0, 0, SERD_LITERAL };
+	SerdNode       node   = { (const uint8_t*)buf, 0, 0, SERD_LITERAL };
 
 	// Point s to the end
 	char* s = buf + digits - 1;
@@ -337,7 +330,7 @@ serd_node_new_integer(int64_t i)
 		++s;
 	}
 
-	node.n_bytes = node.n_chars = (size_t)(s - buf) + 1u;
+	node.n_bytes = (size_t)(s - buf) + 1u;
 
 	// Write integer part (right to left)
 	do {
@@ -352,7 +345,7 @@ serd_node_new_blob(const void* buf, size_t size, bool wrap_lines)
 {
 	const size_t len  = serd_base64_get_length(size, wrap_lines);
 	uint8_t*     str  = (uint8_t*)calloc(len + 2, 1);
-	SerdNode     node = { str, len, len, 0, SERD_LITERAL };
+	SerdNode     node = {str, len, 0, SERD_LITERAL};
 
 	if (serd_base64_encode(str, buf, size, wrap_lines)) {
 		node.flags |= SERD_HAS_NEWLINE;
diff --git a/src/reader.c b/src/reader.c
index 5d33e45d..6da29ab1 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -43,7 +43,7 @@ set_blank_id(SerdReader* reader, Ref ref, size_t buf_size)
 {
 	SerdNode*   node   = deref(reader, ref);
 	const char* prefix = reader->bprefix ? (const char*)reader->bprefix : "";
-	node->n_bytes = node->n_chars = (size_t)snprintf(
+	node->n_bytes = (size_t)snprintf(
 		(char*)node->buf, buf_size, "%sb%u", prefix, reader->next_id++);
 }
 
@@ -85,7 +85,7 @@ push_node_padded(SerdReader* reader, size_t maxlen,
 		&reader->stack, sizeof(SerdNode) + maxlen + 1, sizeof(SerdNode));
 
 	SerdNode* const node = (SerdNode*)mem;
-	node->n_bytes = node->n_chars = n_bytes;
+	node->n_bytes = n_bytes;
 	node->flags   = 0;
 	node->type    = type;
 	node->buf     = NULL;
diff --git a/src/reader.h b/src/reader.h
index 9de37f4c..684bceb2 100644
--- a/src/reader.h
+++ b/src/reader.h
@@ -165,9 +165,6 @@ push_byte(SerdReader* reader, Ref ref, const int c)
 	uint8_t* const  s    = (uint8_t*)serd_stack_push(&reader->stack, 1);
 	SerdNode* const node = (SerdNode*)(reader->stack.buf + ref);
 	++node->n_bytes;
-	if (!(c & 0x80)) {  // Starts with 0 bit, start of new character
-		++node->n_chars;
-	}
 	*(s - 1) = (uint8_t)c;
 	*s       = '\0';
 	return SERD_SUCCESS;
diff --git a/src/string.c b/src/string.c
index e1e5dbda..485d3945 100644
--- a/src/string.c
+++ b/src/string.c
@@ -18,9 +18,11 @@
 
 #include "serd/serd.h"
 
+#include <assert.h>
 #include <math.h>
 #include <stdint.h>
 #include <stdlib.h>
+#include <string.h>
 
 void
 serd_free(void* ptr)
@@ -63,46 +65,32 @@ serd_update_flags(const uint8_t c, SerdNodeFlags* const flags)
 size_t
 serd_substrlen(const uint8_t* const str,
                const size_t         len,
-               size_t* const        n_bytes,
                SerdNodeFlags* const flags)
 {
-	size_t        n_chars = 0;
-	size_t        i       = 0;
-	SerdNodeFlags f       = 0;
+	assert(flags);
+
+	size_t i = 0;
+	*flags = 0;
 	for (; i < len && str[i]; ++i) {
-		if ((str[i] & 0xC0) != 0x80) {  // Start of new character
-			++n_chars;
-			serd_update_flags(str[i], &f);
-		}
-	}
-	if (n_bytes) {
-		*n_bytes = i;
+		serd_update_flags(str[i], flags);
 	}
-	if (flags) {
-		*flags = f;
-	}
-	return n_chars;
+
+	return i;
 }
 
 size_t
-serd_strlen(const uint8_t* str, size_t* n_bytes, SerdNodeFlags* flags)
+serd_strlen(const uint8_t* str, SerdNodeFlags* flags)
 {
-	size_t        n_chars = 0;
-	size_t        i       = 0;
-	SerdNodeFlags f       = 0;
-	for (; str[i]; ++i) {
-		if ((str[i] & 0xC0) != 0x80) {  // Start of new character
-			++n_chars;
-			serd_update_flags(str[i], &f);
-		}
-	}
-	if (n_bytes) {
-		*n_bytes = i;
-	}
 	if (flags) {
-		*flags = f;
+		size_t i = 0;
+		*flags = 0;
+		for (; str[i]; ++i) {
+			serd_update_flags(str[i], flags);
+		}
+		return i;
 	}
-	return n_chars;
+
+	return strlen((const char*)str);
 }
 
 static inline double
diff --git a/src/string_utils.h b/src/string_utils.h
index b80bf5aa..fd86b0c9 100644
--- a/src/string_utils.h
+++ b/src/string_utils.h
@@ -88,10 +88,7 @@ is_windows_path(const uint8_t* path)
 }
 
 size_t
-serd_substrlen(const uint8_t* str,
-               size_t         len,
-               size_t*        n_bytes,
-               SerdNodeFlags* flags);
+serd_substrlen(const uint8_t* str, size_t len, SerdNodeFlags* flags);
 
 static inline int
 serd_strncasecmp(const char* s1, const char* s2, size_t n)
diff --git a/src/writer.c b/src/writer.c
index de37b984..96d2e92b 100644
--- a/src/writer.c
+++ b/src/writer.c
@@ -45,9 +45,9 @@ typedef struct {
 } WriteContext;
 
 static const WriteContext WRITE_CONTEXT_NULL = {
-	{ 0, 0, 0, 0, SERD_NOTHING },
-	{ 0, 0, 0, 0, SERD_NOTHING },
-	{ 0, 0, 0, 0, SERD_NOTHING }
+	{ 0, 0, 0, SERD_NOTHING },
+	{ 0, 0, 0, SERD_NOTHING },
+	{ 0, 0, 0, SERD_NOTHING }
 };
 
 typedef enum {
@@ -165,7 +165,6 @@ copy_node(SerdNode* dst, const SerdNode* src)
 	if (src) {
 		dst->buf = (uint8_t*)realloc((char*)dst->buf, src->n_bytes + 1);
 		dst->n_bytes = src->n_bytes;
-		dst->n_chars = src->n_chars;
 		dst->flags   = src->flags;
 		dst->type    = src->type;
 		memcpy((char*)dst->buf, src->buf, src->n_bytes + 1);
diff --git a/tests/serd_test.c b/tests/serd_test.c
index 816a25cc..ef50ebac 100644
--- a/tests/serd_test.c
+++ b/tests/serd_test.c
@@ -247,7 +247,7 @@ test_double_to_node(void)
 			: ((const char*)node.buf == dbl_test_strs[i]);
 		assert(pass);
 		const size_t len = node.buf ? strlen((const char*)node.buf) : 0;
-		assert(node.n_bytes == len && node.n_chars == len);
+		assert(node.n_bytes == len);
 		serd_node_free(&node);
 	}
 }
@@ -267,7 +267,7 @@ test_integer_to_node(void)
 		SerdNode node = serd_node_new_integer(int_test_nums[i]);
 		assert(!strcmp((const char*)node.buf, (const char*)int_test_strs[i]));
 		const size_t len = strlen((const char*)node.buf);
-		assert(node.n_bytes == len && node.n_chars == len);
+		assert(node.n_bytes == len);
 		serd_node_free(&node);
 	}
 }
@@ -283,7 +283,6 @@ test_blob_to_node(void)
 
 		SerdNode blob = serd_node_new_blob(data, size, size % 5);
 
-		assert(blob.n_bytes == blob.n_chars);
 		assert(blob.n_bytes == strlen((const char*)blob.buf));
 
 		size_t   out_size = 0;
@@ -306,15 +305,10 @@ test_strlen(void)
 {
 	const uint8_t str[] = { '"', '5', 0xE2, 0x82, 0xAC, '"', '\n', 0 };
 
-	size_t        n_bytes = 0;
 	SerdNodeFlags flags   = 0;
-	size_t        len     = serd_strlen(str, &n_bytes, &flags);
-	assert(len == 5 && n_bytes == 7 &&
-	       flags == (SERD_HAS_QUOTE | SERD_HAS_NEWLINE));
-	len = serd_strlen(str, NULL, &flags);
-	assert(len == 5);
-
-	assert(serd_strlen(str, &n_bytes, NULL) == 5);
+	size_t        n_bytes = serd_strlen(str, &flags);
+	assert(n_bytes == 7 && flags == (SERD_HAS_QUOTE|SERD_HAS_NEWLINE));
+	assert(serd_strlen(str, NULL) == 7);
 }
 
 static void
@@ -402,8 +396,7 @@ static void
 test_node_from_string(void)
 {
 	SerdNode node = serd_node_from_string(SERD_LITERAL, (const uint8_t*)"hello\"");
-	assert(node.n_bytes == 6 && node.n_chars == 6 &&
-	       node.flags == SERD_HAS_QUOTE &&
+	assert(node.n_bytes == 6 && node.flags == SERD_HAS_QUOTE &&
 	       !strcmp((const char*)node.buf, "hello\""));
 
 	node = serd_node_from_string(SERD_URI, NULL);
@@ -414,17 +407,14 @@ static void
 test_node_from_substring(void)
 {
 	SerdNode empty = serd_node_from_substring(SERD_LITERAL, NULL, 32);
-	assert(!empty.buf && !empty.n_bytes && !empty.n_chars && !empty.flags &&
-	       !empty.type);
+	assert(!empty.buf && !empty.n_bytes && !empty.flags && !empty.type);
 
 	SerdNode a_b = serd_node_from_substring(SERD_LITERAL, USTR("a\"bc"), 3);
-	assert(a_b.n_bytes == 3 && a_b.n_chars == 3 &&
-	       a_b.flags == SERD_HAS_QUOTE &&
+	assert(a_b.n_bytes == 3 && a_b.flags == SERD_HAS_QUOTE &&
 	       !strncmp((const char*)a_b.buf, "a\"b", 3));
 
 	a_b = serd_node_from_substring(SERD_LITERAL, USTR("a\"bc"), 10);
-	assert(a_b.n_bytes == 4 && a_b.n_chars == 4 &&
-	       a_b.flags == SERD_HAS_QUOTE &&
+	assert(a_b.n_bytes == 4 && a_b.flags == SERD_HAS_QUOTE &&
 	       !strncmp((const char*)a_b.buf, "a\"bc", 4));
 }
 
-- 
cgit v1.2.1