Use tighter types for UTF-8

author: David Robillard <d@drobilla.net> 2024-09-27 13:06:07 -0400
committer: David Robillard <d@drobilla.net> 2024-09-27 13:06:07 -0400
commit: a4acf0c7414451d22b6264f2fabfa5eb348fbb62 (patch)
tree: 2856d682de758070f514fa08b53dcee7b7cf200a
parent: 1dd97fa51d474520c9b8ca002b58603e4234abab (diff)
download: serd-a4acf0c7414451d22b6264f2fabfa5eb348fbb62.tar.gz
serd-a4acf0c7414451d22b6264f2fabfa5eb348fbb62.tar.bz2
serd-a4acf0c7414451d22b6264f2fabfa5eb348fbb62.zip
3 files changed, 16 insertions, 16 deletions
diff --git a/src/n3.c b/src/n3.c
index e5a06c77..b3cfbb8a 100644
--- a/src/n3.c
+++ b/src/n3.c
@@ -172,7 +172,7 @@ bad_char(SerdReader* const reader, const char* const fmt, const uint8_t c)
 static SerdStatus
 read_utf8_bytes(SerdReader* const reader,
                 uint8_t           bytes[4],
-                uint32_t* const   size,
+                uint8_t* const    size,
                 const uint8_t     c)
 {
   *size = utf8_num_bytes(c);
@@ -181,9 +181,9 @@ read_utf8_bytes(SerdReader* const reader,
   }
 
   bytes[0] = c;
-  for (unsigned i = 1; i < *size; ++i) {
+  for (uint8_t i = 1U; i < *size; ++i) {
     const int b = peek_byte(reader);
-    if (b == EOF || ((uint8_t)b & 0x80) == 0) {
+    if (b == EOF || ((uint8_t)b & 0x80U) == 0U) {
       return bad_char(reader, "invalid UTF-8 continuation 0x%X\n", (uint8_t)b);
     }
 
@@ -196,7 +196,7 @@ read_utf8_bytes(SerdReader* const reader,
 static SerdStatus
 read_utf8_character(SerdReader* const reader, const Ref dest, const uint8_t c)
 {
-  uint32_t   size     = 0;
+  uint8_t    size     = 0U;
   uint8_t    bytes[4] = {0, 0, 0, 0};
   SerdStatus st       = read_utf8_bytes(reader, bytes, &size, c);
   if (st) {
@@ -214,7 +214,7 @@ read_utf8_code(SerdReader* const reader,
                uint32_t* const   code,
                const uint8_t     c)
 {
-  uint32_t   size     = 0;
+  uint8_t    size     = 0U;
   uint8_t    bytes[4] = {0, 0, 0, 0};
   SerdStatus st       = read_utf8_bytes(reader, bytes, &size, c);
   if (st) {
diff --git a/src/string_utils.h b/src/string_utils.h
index 2ce90ac9..7c8348ca 100644
--- a/src/string_utils.h
+++ b/src/string_utils.h
@@ -107,7 +107,7 @@ serd_strcasecmp(const char* s1, const char* s2)
   return (c1 == c2) ? 0 : (c1 < c2) ? -1 : +1;
 }
 
-static inline uint32_t
+static inline uint8_t
 utf8_num_bytes(const uint8_t leading)
 {
   return ((leading & 0x80U) == 0x00U)   ? 1U  // Starts with `0'
@@ -119,18 +119,18 @@ utf8_num_bytes(const uint8_t leading)
 
 /// Return the code point of a UTF-8 character with known length
 static inline uint32_t
-parse_counted_utf8_char(const uint8_t* utf8, size_t size)
+parse_counted_utf8_char(const uint8_t* const utf8, const uint8_t size)
 {
   uint32_t c = utf8[0] & ((1U << (8U - size)) - 1U);
   for (size_t i = 1; i < size; ++i) {
-    c = (c << 6) | (utf8[i] & 0x3FU);
+    c = (c << 6U) | (utf8[i] & 0x3FU);
   }
   return c;
 }
 
 /// Parse a UTF-8 character, set *size to the length, and return the code point
 static inline uint32_t
-parse_utf8_char(const uint8_t* utf8, size_t* size)
+parse_utf8_char(const uint8_t* const utf8, uint8_t* const size)
 {
   switch (*size = utf8_num_bytes(utf8[0])) {
   case 1:
diff --git a/src/writer.c b/src/writer.c
index e4ef5651..c75d3fb7 100644
--- a/src/writer.c
+++ b/src/writer.c
@@ -249,7 +249,7 @@ esink(const void* buf, size_t len, SerdWriter* writer)
 static size_t
 write_character(SerdWriter*    writer,
                 const uint8_t* utf8,
-                size_t*        size,
+                uint8_t*       size,
                 SerdStatus*    st)
 {
   char           escape[11] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
@@ -315,14 +315,14 @@ write_uri(SerdWriter*    writer,
     }
 
     // Write UTF-8 character
-    size_t size = 0;
+    uint8_t size = 0U;
     len += write_character(writer, utf8 + i, &size, st);
     i += size;
     if (*st && (writer->style & SERD_STYLE_STRICT)) {
       break;
     }
 
-    if (size == 0) {
+    if (!size) {
       // Corrupt input, write percent-encoded bytes and scan to next start
       char escape[4] = {0, 0, 0, 0};
       for (; i < n_bytes && (utf8[i] & 0x80); ++i) {
@@ -484,19 +484,19 @@ write_text(SerdWriter*    writer,
     }
 
     // Write UTF-8 character
-    size_t size = 0;
+    uint8_t size = 0U;
     write_character(writer, utf8 + i - 1, &size, &st);
     if (st && (writer->style & SERD_STYLE_STRICT)) {
       return st;
     }
 
-    if (size == 0) {
+    if (!size) {
       // Corrupt input, write replacement character and scan to the next start
       st = esink(replacement_char, sizeof(replacement_char), writer);
-      for (; i < n_bytes && (utf8[i] & 0x80); ++i) {
+      for (; i < n_bytes && (utf8[i] & 0x80U); ++i) {
       }
     } else {
-      i += size - 1;
+      i += size - 1U;
     }
   }
author	David Robillard <d@drobilla.net>	2024-09-27 13:06:07 -0400
committer	David Robillard <d@drobilla.net>	2024-09-27 13:06:07 -0400
commit	a4acf0c7414451d22b6264f2fabfa5eb348fbb62 (patch)
tree	2856d682de758070f514fa08b53dcee7b7cf200a
parent	1dd97fa51d474520c9b8ca002b58603e4234abab (diff)
download	serd-a4acf0c7414451d22b6264f2fabfa5eb348fbb62.tar.gz serd-a4acf0c7414451d22b6264f2fabfa5eb348fbb62.tar.bz2 serd-a4acf0c7414451d22b6264f2fabfa5eb348fbb62.zip