diff options
-rw-r--r-- | src/read_utf8.c | 16 | ||||
-rw-r--r-- | src/string_utils.h | 8 | ||||
-rw-r--r-- | src/writer.c | 12 |
3 files changed, 19 insertions, 17 deletions
diff --git a/src/read_utf8.c b/src/read_utf8.c index c6a24778..fb8ed0e2 100644 --- a/src/read_utf8.c +++ b/src/read_utf8.c @@ -8,6 +8,8 @@ #include <stdio.h> +#define MAX_UTF8_BYTES 4U + static SerdStatus skip_invalid_utf8(SerdReader* const reader) { @@ -28,8 +30,8 @@ bad_char(SerdReader* const reader, const char* const fmt, const uint8_t c) static SerdStatus read_utf8_continuation_bytes(SerdReader* const reader, - uint8_t bytes[4], - uint32_t* const size, + uint8_t bytes[static MAX_UTF8_BYTES], + uint8_t* const size, const uint8_t lead) { *size = utf8_num_bytes(lead); @@ -39,7 +41,7 @@ read_utf8_continuation_bytes(SerdReader* const reader, bytes[0] = lead; - for (uint32_t i = 1U; i < *size; ++i) { + for (uint8_t i = 1U; i < *size; ++i) { const int b = peek_byte(reader); if (b == EOF) { return r_err(reader, SERD_NO_DATA, "unexpected end of input"); @@ -62,8 +64,8 @@ read_utf8_continuation(SerdReader* const reader, SerdNode* const dest, const uint8_t lead) { - uint32_t size = 0; - uint8_t bytes[8] = {lead, 0U, 0U, 0U, 0U, 0U, 0U, 0U}; + uint8_t size = 0; + uint8_t bytes[MAX_UTF8_BYTES] = {lead, 0U, 0U, 0U}; SerdStatus st = read_utf8_continuation_bytes(reader, bytes, &size, lead); if (st) { @@ -79,8 +81,8 @@ read_utf8_code_point(SerdReader* const reader, uint32_t* const code, const uint8_t lead) { - uint32_t size = 0U; - uint8_t bytes[8] = {lead, 0U, 0U, 0U, 0U, 0U, 0U, 0U}; + uint8_t size = 0U; + uint8_t bytes[MAX_UTF8_BYTES] = {lead, 0U, 0U, 0U}; *code = 0U; diff --git a/src/string_utils.h b/src/string_utils.h index 564c58ad..8f7ea083 100644 --- a/src/string_utils.h +++ b/src/string_utils.h @@ -104,7 +104,7 @@ serd_strncasecmp(const char* s1, const char* s2, size_t n) return 0; } -static inline uint32_t +static inline uint8_t utf8_num_bytes(const uint8_t leading) { return ((leading & 0x80U) == 0x00U) ? 1U // Starts with `0' @@ -114,7 +114,7 @@ utf8_num_bytes(const uint8_t leading) : 0U; // Invalid } -static inline unsigned +static inline uint8_t utf8_num_bytes_for_codepoint(const uint32_t code) { return (code < 0x00000080) ? 1U @@ -126,7 +126,7 @@ utf8_num_bytes_for_codepoint(const uint32_t code) /// Return the code point of a UTF-8 character with known length static inline uint32_t -parse_counted_utf8_char(const uint8_t* const utf8, const size_t size) +parse_counted_utf8_char(const uint8_t* const utf8, const uint8_t size) { uint32_t c = utf8[0] & ((1U << (8U - size)) - 1U); @@ -139,7 +139,7 @@ parse_counted_utf8_char(const uint8_t* const utf8, const size_t size) /// Parse a UTF-8 character, set *size to the length, and return the code point static inline uint32_t -parse_utf8_char(const uint8_t* const utf8, size_t* const size) +parse_utf8_char(const uint8_t* const utf8, uint8_t* const size) { switch (*size = utf8_num_bytes(utf8[0])) { case 1: diff --git a/src/writer.c b/src/writer.c index 60c17e11..be199af4 100644 --- a/src/writer.c +++ b/src/writer.c @@ -284,7 +284,7 @@ esink(const void* buf, size_t len, SerdWriter* writer) static size_t write_character(SerdWriter* const writer, const uint8_t* const utf8, - size_t* const size, + uint8_t* const size, SerdStatus* const st) { char escape[11] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; @@ -359,7 +359,7 @@ write_uri(SerdWriter* writer, const char* utf8, size_t n_bytes, SerdStatus* st) } // Write UTF-8 character - size_t size = 0; + uint8_t size = 0; len += write_character(writer, (const uint8_t*)utf8 + i, &size, st); i += size; if (*st && !(writer->flags & SERD_WRITE_LAX)) { @@ -447,7 +447,7 @@ write_lname(SerdWriter* writer, const char* utf8, const size_t n_bytes) sets of valid characters. */ // Write first character - size_t first_size = 0U; + uint8_t first_size = 0U; const int first = (int)parse_utf8_char((const uint8_t*)utf8, &first_size); if (is_PN_CHARS_U(first) || first == ':' || is_digit(first)) { TRY(st, esink(utf8, first_size, writer)); @@ -457,7 +457,7 @@ write_lname(SerdWriter* writer, const char* utf8, const size_t n_bytes) // Write middle and last characters for (size_t i = first_size; i < n_bytes;) { - size_t c_size = 0U; + uint8_t c_size = 0U; const int c = (int)parse_utf8_char((const uint8_t*)utf8 + i, &c_size); if (is_PN_CHARS(c) || c == ':' || (c == '.' && (i + 1U < n_bytes))) { @@ -581,7 +581,7 @@ write_text(SerdWriter* writer, if (escape_len == 0) { // No special escape for this character, write full Unicode escape - size_t size = 0; + uint8_t size = 0; write_character(writer, (const uint8_t*)utf8 + i - 1, &size, &st); if (st && !(writer->flags & SERD_WRITE_LAX)) { return st; @@ -593,7 +593,7 @@ write_text(SerdWriter* writer, for (; i < n_bytes && (utf8[i] & 0x80); ++i) { } } else { - i += size - 1; + i += size - 1U; } } } |