aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2024-09-27 13:06:07 -0400
committerDavid Robillard <d@drobilla.net>2024-09-27 13:06:07 -0400
commita4acf0c7414451d22b6264f2fabfa5eb348fbb62 (patch)
tree2856d682de758070f514fa08b53dcee7b7cf200a
parent1dd97fa51d474520c9b8ca002b58603e4234abab (diff)
downloadserd-a4acf0c7414451d22b6264f2fabfa5eb348fbb62.tar.gz
serd-a4acf0c7414451d22b6264f2fabfa5eb348fbb62.tar.bz2
serd-a4acf0c7414451d22b6264f2fabfa5eb348fbb62.zip
Use tighter types for UTF-8
-rw-r--r--src/n3.c10
-rw-r--r--src/string_utils.h8
-rw-r--r--src/writer.c14
3 files changed, 16 insertions, 16 deletions
diff --git a/src/n3.c b/src/n3.c
index e5a06c77..b3cfbb8a 100644
--- a/src/n3.c
+++ b/src/n3.c
@@ -172,7 +172,7 @@ bad_char(SerdReader* const reader, const char* const fmt, const uint8_t c)
static SerdStatus
read_utf8_bytes(SerdReader* const reader,
uint8_t bytes[4],
- uint32_t* const size,
+ uint8_t* const size,
const uint8_t c)
{
*size = utf8_num_bytes(c);
@@ -181,9 +181,9 @@ read_utf8_bytes(SerdReader* const reader,
}
bytes[0] = c;
- for (unsigned i = 1; i < *size; ++i) {
+ for (uint8_t i = 1U; i < *size; ++i) {
const int b = peek_byte(reader);
- if (b == EOF || ((uint8_t)b & 0x80) == 0) {
+ if (b == EOF || ((uint8_t)b & 0x80U) == 0U) {
return bad_char(reader, "invalid UTF-8 continuation 0x%X\n", (uint8_t)b);
}
@@ -196,7 +196,7 @@ read_utf8_bytes(SerdReader* const reader,
static SerdStatus
read_utf8_character(SerdReader* const reader, const Ref dest, const uint8_t c)
{
- uint32_t size = 0;
+ uint8_t size = 0U;
uint8_t bytes[4] = {0, 0, 0, 0};
SerdStatus st = read_utf8_bytes(reader, bytes, &size, c);
if (st) {
@@ -214,7 +214,7 @@ read_utf8_code(SerdReader* const reader,
uint32_t* const code,
const uint8_t c)
{
- uint32_t size = 0;
+ uint8_t size = 0U;
uint8_t bytes[4] = {0, 0, 0, 0};
SerdStatus st = read_utf8_bytes(reader, bytes, &size, c);
if (st) {
diff --git a/src/string_utils.h b/src/string_utils.h
index 2ce90ac9..7c8348ca 100644
--- a/src/string_utils.h
+++ b/src/string_utils.h
@@ -107,7 +107,7 @@ serd_strcasecmp(const char* s1, const char* s2)
return (c1 == c2) ? 0 : (c1 < c2) ? -1 : +1;
}
-static inline uint32_t
+static inline uint8_t
utf8_num_bytes(const uint8_t leading)
{
return ((leading & 0x80U) == 0x00U) ? 1U // Starts with `0'
@@ -119,18 +119,18 @@ utf8_num_bytes(const uint8_t leading)
/// Return the code point of a UTF-8 character with known length
static inline uint32_t
-parse_counted_utf8_char(const uint8_t* utf8, size_t size)
+parse_counted_utf8_char(const uint8_t* const utf8, const uint8_t size)
{
uint32_t c = utf8[0] & ((1U << (8U - size)) - 1U);
for (size_t i = 1; i < size; ++i) {
- c = (c << 6) | (utf8[i] & 0x3FU);
+ c = (c << 6U) | (utf8[i] & 0x3FU);
}
return c;
}
/// Parse a UTF-8 character, set *size to the length, and return the code point
static inline uint32_t
-parse_utf8_char(const uint8_t* utf8, size_t* size)
+parse_utf8_char(const uint8_t* const utf8, uint8_t* const size)
{
switch (*size = utf8_num_bytes(utf8[0])) {
case 1:
diff --git a/src/writer.c b/src/writer.c
index e4ef5651..c75d3fb7 100644
--- a/src/writer.c
+++ b/src/writer.c
@@ -249,7 +249,7 @@ esink(const void* buf, size_t len, SerdWriter* writer)
static size_t
write_character(SerdWriter* writer,
const uint8_t* utf8,
- size_t* size,
+ uint8_t* size,
SerdStatus* st)
{
char escape[11] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
@@ -315,14 +315,14 @@ write_uri(SerdWriter* writer,
}
// Write UTF-8 character
- size_t size = 0;
+ uint8_t size = 0U;
len += write_character(writer, utf8 + i, &size, st);
i += size;
if (*st && (writer->style & SERD_STYLE_STRICT)) {
break;
}
- if (size == 0) {
+ if (!size) {
// Corrupt input, write percent-encoded bytes and scan to next start
char escape[4] = {0, 0, 0, 0};
for (; i < n_bytes && (utf8[i] & 0x80); ++i) {
@@ -484,19 +484,19 @@ write_text(SerdWriter* writer,
}
// Write UTF-8 character
- size_t size = 0;
+ uint8_t size = 0U;
write_character(writer, utf8 + i - 1, &size, &st);
if (st && (writer->style & SERD_STYLE_STRICT)) {
return st;
}
- if (size == 0) {
+ if (!size) {
// Corrupt input, write replacement character and scan to the next start
st = esink(replacement_char, sizeof(replacement_char), writer);
- for (; i < n_bytes && (utf8[i] & 0x80); ++i) {
+ for (; i < n_bytes && (utf8[i] & 0x80U); ++i) {
}
} else {
- i += size - 1;
+ i += size - 1U;
}
}