From a4acf0c7414451d22b6264f2fabfa5eb348fbb62 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Fri, 27 Sep 2024 13:06:07 -0400 Subject: Use tighter types for UTF-8 --- src/n3.c | 10 +++++----- src/string_utils.h | 8 ++++---- src/writer.c | 14 +++++++------- 3 files changed, 16 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/n3.c b/src/n3.c index e5a06c77..b3cfbb8a 100644 --- a/src/n3.c +++ b/src/n3.c @@ -172,7 +172,7 @@ bad_char(SerdReader* const reader, const char* const fmt, const uint8_t c) static SerdStatus read_utf8_bytes(SerdReader* const reader, uint8_t bytes[4], - uint32_t* const size, + uint8_t* const size, const uint8_t c) { *size = utf8_num_bytes(c); @@ -181,9 +181,9 @@ read_utf8_bytes(SerdReader* const reader, } bytes[0] = c; - for (unsigned i = 1; i < *size; ++i) { + for (uint8_t i = 1U; i < *size; ++i) { const int b = peek_byte(reader); - if (b == EOF || ((uint8_t)b & 0x80) == 0) { + if (b == EOF || ((uint8_t)b & 0x80U) == 0U) { return bad_char(reader, "invalid UTF-8 continuation 0x%X\n", (uint8_t)b); } @@ -196,7 +196,7 @@ read_utf8_bytes(SerdReader* const reader, static SerdStatus read_utf8_character(SerdReader* const reader, const Ref dest, const uint8_t c) { - uint32_t size = 0; + uint8_t size = 0U; uint8_t bytes[4] = {0, 0, 0, 0}; SerdStatus st = read_utf8_bytes(reader, bytes, &size, c); if (st) { @@ -214,7 +214,7 @@ read_utf8_code(SerdReader* const reader, uint32_t* const code, const uint8_t c) { - uint32_t size = 0; + uint8_t size = 0U; uint8_t bytes[4] = {0, 0, 0, 0}; SerdStatus st = read_utf8_bytes(reader, bytes, &size, c); if (st) { diff --git a/src/string_utils.h b/src/string_utils.h index 2ce90ac9..7c8348ca 100644 --- a/src/string_utils.h +++ b/src/string_utils.h @@ -107,7 +107,7 @@ serd_strcasecmp(const char* s1, const char* s2) return (c1 == c2) ? 0 : (c1 < c2) ? -1 : +1; } -static inline uint32_t +static inline uint8_t utf8_num_bytes(const uint8_t leading) { return ((leading & 0x80U) == 0x00U) ? 1U // Starts with `0' @@ -119,18 +119,18 @@ utf8_num_bytes(const uint8_t leading) /// Return the code point of a UTF-8 character with known length static inline uint32_t -parse_counted_utf8_char(const uint8_t* utf8, size_t size) +parse_counted_utf8_char(const uint8_t* const utf8, const uint8_t size) { uint32_t c = utf8[0] & ((1U << (8U - size)) - 1U); for (size_t i = 1; i < size; ++i) { - c = (c << 6) | (utf8[i] & 0x3FU); + c = (c << 6U) | (utf8[i] & 0x3FU); } return c; } /// Parse a UTF-8 character, set *size to the length, and return the code point static inline uint32_t -parse_utf8_char(const uint8_t* utf8, size_t* size) +parse_utf8_char(const uint8_t* const utf8, uint8_t* const size) { switch (*size = utf8_num_bytes(utf8[0])) { case 1: diff --git a/src/writer.c b/src/writer.c index e4ef5651..c75d3fb7 100644 --- a/src/writer.c +++ b/src/writer.c @@ -249,7 +249,7 @@ esink(const void* buf, size_t len, SerdWriter* writer) static size_t write_character(SerdWriter* writer, const uint8_t* utf8, - size_t* size, + uint8_t* size, SerdStatus* st) { char escape[11] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; @@ -315,14 +315,14 @@ write_uri(SerdWriter* writer, } // Write UTF-8 character - size_t size = 0; + uint8_t size = 0U; len += write_character(writer, utf8 + i, &size, st); i += size; if (*st && (writer->style & SERD_STYLE_STRICT)) { break; } - if (size == 0) { + if (!size) { // Corrupt input, write percent-encoded bytes and scan to next start char escape[4] = {0, 0, 0, 0}; for (; i < n_bytes && (utf8[i] & 0x80); ++i) { @@ -484,19 +484,19 @@ write_text(SerdWriter* writer, } // Write UTF-8 character - size_t size = 0; + uint8_t size = 0U; write_character(writer, utf8 + i - 1, &size, &st); if (st && (writer->style & SERD_STYLE_STRICT)) { return st; } - if (size == 0) { + if (!size) { // Corrupt input, write replacement character and scan to the next start st = esink(replacement_char, sizeof(replacement_char), writer); - for (; i < n_bytes && (utf8[i] & 0x80); ++i) { + for (; i < n_bytes && (utf8[i] & 0x80U); ++i) { } } else { - i += size - 1; + i += size - 1U; } } -- cgit v1.2.1