aboutsummaryrefslogtreecommitdiffstats
path: root/src/string_utils.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/string_utils.h')
-rw-r--r--src/string_utils.h28
1 files changed, 26 insertions, 2 deletions
diff --git a/src/string_utils.h b/src/string_utils.h
index 5eeabc6b..a411b90d 100644
--- a/src/string_utils.h
+++ b/src/string_utils.h
@@ -152,20 +152,44 @@ utf8_num_bytes(const uint8_t leading)
return lengths[leading >> 3u];
}
+static inline unsigned
+utf8_num_bytes_for_codepoint(const uint32_t code)
+{
+ if (code < 0x00000080) {
+ return 1u;
+ }
+
+ if (code < 0x00000800) {
+ return 2u;
+ }
+
+ if (code < 0x00010000) {
+ return 3u;
+ }
+
+ if (code < 0x00110000) {
+ return 4u;
+ }
+
+ return 0u; // Out of range
+}
+
/// Return the code point of a UTF-8 character with known length
static inline uint32_t
-parse_counted_utf8_char(const uint8_t* utf8, size_t size)
+parse_counted_utf8_char(const uint8_t* const utf8, const size_t size)
{
uint32_t c = utf8[0] & ((1u << (8u - size)) - 1u);
+
for (size_t i = 1; i < size; ++i) {
c = (c << 6) | (utf8[i] & 0x3Fu);
}
+
return c;
}
/// Parse a UTF-8 character, set *size to the length, and return the code point
static inline uint32_t
-parse_utf8_char(const uint8_t* utf8, size_t* size)
+parse_utf8_char(const uint8_t* const utf8, size_t* const size)
{
switch (*size = utf8_num_bytes(utf8[0])) {
case 1: