diff options
-rw-r--r-- | src/serd_internal.h | 24 | ||||
-rw-r--r-- | src/writer.c | 21 |
2 files changed, 25 insertions, 20 deletions
diff --git a/src/serd_internal.h b/src/serd_internal.h index 481b56fa..267ef6f6 100644 --- a/src/serd_internal.h +++ b/src/serd_internal.h @@ -356,6 +356,30 @@ utf8_num_bytes(const uint8_t c) #endif } +/// Return the code point of a UTF-8 character with known length +static inline uint32_t +parse_counted_utf8_char(const uint8_t* utf8, size_t size) +{ + uint32_t c = utf8[0] & ((1 << (8 - size)) - 1); + for (size_t i = 1; i < size; ++i) { + const uint8_t in = utf8[i] & 0x3F; + c = (c << 6) | in; + } + return c; +} + +/// Parse a UTF-8 character, set *size to the length, and return the code point +static inline uint32_t +parse_utf8_char(const uint8_t* utf8, size_t* size) +{ + switch (*size = utf8_num_bytes(utf8[0])) { + case 1: case 2: case 3: case 4: + return parse_counted_utf8_char(utf8, *size); + default: + return *size = 0; + } +} + /* URI utilities */ static inline bool diff --git a/src/writer.c b/src/writer.c index 94e2c1d2..a359ee6c 100644 --- a/src/writer.c +++ b/src/writer.c @@ -174,32 +174,13 @@ sink(const void* buf, size_t len, SerdWriter* writer) return serd_byte_sink_write(buf, len, &writer->byte_sink); } -// Parse a UTF-8 character, set *size to the length, and return the code point -static inline uint32_t -parse_utf8_char(SerdWriter* writer, const uint8_t* utf8, size_t* size) -{ - switch (*size = utf8_num_bytes(utf8[0])) { - case 1: case 2: case 3: case 4: - break; - default: - return *size = 0; - } - - uint32_t c = utf8[0] & ((1 << (8 - *size)) - 1); - for (size_t i = 1; i < *size; ++i) { - const uint8_t in = utf8[i] & 0x3F; - c = (c << 6) | in; - } - return c; -} - // Write a single character, as an escape for single byte characters // (Caller prints any single byte characters that don't need escaping) static size_t write_character(SerdWriter* writer, const uint8_t* utf8, size_t* size) { char escape[11] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - const uint32_t c = parse_utf8_char(writer, utf8, size); + const uint32_t c = parse_utf8_char(utf8, size); switch (*size) { case 0: w_err(writer, SERD_ERR_BAD_ARG, "invalid UTF-8: %X\n", utf8[0]); |