Make sord_node_get_counted return byte count

Also clarify formerly ambiguous documentation, and add sord_node_get_string_measured to get both byte and character counts. Technically, this breaks behaviour, but most (if not all) existing code that uses this function was actually expecting a byte count. This is unsurprising since a UTF-8 character count is almost always useless. This approach causes less problems in reality, since it fixes broken behaviour in the wild without requiring any changes in user code.
author: David Robillard <d@drobilla.net> 2016-03-15 14:52:47 -0400
committer: David Robillard <d@drobilla.net> 2016-03-15 14:52:47 -0400
commit: ce981fefe0c4d4f9c871b574a92eec06e3149e3e (patch)
tree: 92d7bf7e914cc3b9effc82de64350f0d1bc2bc6e /src
parent: 19cd4c95e10ec83f8fbf41809e9d3688c64bd2d8 (diff)
download: sord-ce981fefe0c4d4f9c871b574a92eec06e3149e3e.tar.gz
sord-ce981fefe0c4d4f9c871b574a92eec06e3149e3e.tar.bz2
sord-ce981fefe0c4d4f9c871b574a92eec06e3149e3e.zip
2 files changed, 36 insertions, 9 deletions
diff --git a/src/sord.c b/src/sord.c
index c6b2b38..fbc33f7 100644
--- a/src/sord.c
+++ b/src/sord.c
@@ -919,9 +919,19 @@ sord_node_get_string(const SordNode* node)
 }
 
 const uint8_t*
-sord_node_get_string_counted(const SordNode* node, size_t* len)
+sord_node_get_string_counted(const SordNode* node, size_t* bytes)
 {
-	*len = node->node.n_chars;
+	*bytes = node->node.n_bytes;
+	return node->node.buf;
+}
+
+const uint8_t*
+sord_node_get_string_measured(const SordNode* node,
+                              size_t*         bytes,
+                              size_t*         chars)
+{
+	*bytes = node->node.n_bytes;
+	*chars = node->node.n_chars;
 	return node->node.buf;
 }
 
diff --git a/src/sord_test.c b/src/sord_test.c
index ea258b0..c6e6a5b 100644
--- a/src/sord_test.c
+++ b/src/sord_test.c
@@ -515,17 +515,34 @@ main(int argc, char** argv)
 		goto fail;
 	}
 
-	size_t len;
-	const uint8_t* str = sord_node_get_string_counted(lit_id2, &len);
+	if (sord_num_nodes(world) != initial_num_nodes) {
+		return test_fail("Num nodes %zu != %zu\n",
+		                 sord_num_nodes(world), initial_num_nodes);
+	}
+
+	const uint8_t ni_hao[] = { 0xE4, 0xBD, 0xA0, 0xE5, 0xA5, 0xBD };
+	SordNode*     chello   = sord_new_literal(world, NULL, ni_hao, "cmn");
+
+	// Test literal length
+	size_t         n_bytes;
+	size_t         n_chars;
+	const uint8_t* str = sord_node_get_string_counted(lit_id2, &n_bytes);
 	if (strcmp((const char*)str, "hello")) {
 		return test_fail("Literal node corrupt\n");
-	} else if (len != strlen("hello")) {
-		return test_fail("Literal length incorrect\n");
+	} else if (n_bytes != strlen("hello")) {
+		return test_fail("ASCII literal byte count incorrect\n");
 	}
 
-	if (sord_num_nodes(world) != initial_num_nodes) {
-		return test_fail("Num nodes %zu != %zu\n",
-		                 sord_num_nodes(world), initial_num_nodes);
+	str = sord_node_get_string_measured(lit_id2, &n_bytes, &n_chars);
+	if (n_bytes != strlen("hello") || n_chars != strlen("hello")) {
+		return test_fail("ASCII literal measured length incorrect\n");
+	}
+
+	str = sord_node_get_string_measured(chello, &n_bytes, &n_chars);
+	if (n_bytes != 6) {
+		return test_fail("Multi-byte literal byte count incorrect\n");
+	} else if (n_chars != 2) {
+		return test_fail("Multi-byte literal character count incorrect\n");
 	}
 
 	// Check interning doesn't clash non-equivalent values
author	David Robillard <d@drobilla.net>	2016-03-15 14:52:47 -0400
committer	David Robillard <d@drobilla.net>	2016-03-15 14:52:47 -0400
commit	ce981fefe0c4d4f9c871b574a92eec06e3149e3e (patch)
tree	92d7bf7e914cc3b9effc82de64350f0d1bc2bc6e /src
parent	19cd4c95e10ec83f8fbf41809e9d3688c64bd2d8 (diff)
download	sord-ce981fefe0c4d4f9c871b574a92eec06e3149e3e.tar.gz sord-ce981fefe0c4d4f9c871b574a92eec06e3149e3e.tar.bz2 sord-ce981fefe0c4d4f9c871b574a92eec06e3149e3e.zip