aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/serd_internal.h24
-rw-r--r--src/writer.c21
2 files changed, 25 insertions, 20 deletions
diff --git a/src/serd_internal.h b/src/serd_internal.h
index 481b56fa..267ef6f6 100644
--- a/src/serd_internal.h
+++ b/src/serd_internal.h
@@ -356,6 +356,30 @@ utf8_num_bytes(const uint8_t c)
#endif
}
+/// Return the code point of a UTF-8 character with known length
+static inline uint32_t
+parse_counted_utf8_char(const uint8_t* utf8, size_t size)
+{
+ uint32_t c = utf8[0] & ((1 << (8 - size)) - 1);
+ for (size_t i = 1; i < size; ++i) {
+ const uint8_t in = utf8[i] & 0x3F;
+ c = (c << 6) | in;
+ }
+ return c;
+}
+
+/// Parse a UTF-8 character, set *size to the length, and return the code point
+static inline uint32_t
+parse_utf8_char(const uint8_t* utf8, size_t* size)
+{
+ switch (*size = utf8_num_bytes(utf8[0])) {
+ case 1: case 2: case 3: case 4:
+ return parse_counted_utf8_char(utf8, *size);
+ default:
+ return *size = 0;
+ }
+}
+
/* URI utilities */
static inline bool
diff --git a/src/writer.c b/src/writer.c
index 94e2c1d2..a359ee6c 100644
--- a/src/writer.c
+++ b/src/writer.c
@@ -174,32 +174,13 @@ sink(const void* buf, size_t len, SerdWriter* writer)
return serd_byte_sink_write(buf, len, &writer->byte_sink);
}
-// Parse a UTF-8 character, set *size to the length, and return the code point
-static inline uint32_t
-parse_utf8_char(SerdWriter* writer, const uint8_t* utf8, size_t* size)
-{
- switch (*size = utf8_num_bytes(utf8[0])) {
- case 1: case 2: case 3: case 4:
- break;
- default:
- return *size = 0;
- }
-
- uint32_t c = utf8[0] & ((1 << (8 - *size)) - 1);
- for (size_t i = 1; i < *size; ++i) {
- const uint8_t in = utf8[i] & 0x3F;
- c = (c << 6) | in;
- }
- return c;
-}
-
// Write a single character, as an escape for single byte characters
// (Caller prints any single byte characters that don't need escaping)
static size_t
write_character(SerdWriter* writer, const uint8_t* utf8, size_t* size)
{
char escape[11] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
- const uint32_t c = parse_utf8_char(writer, utf8, size);
+ const uint32_t c = parse_utf8_char(utf8, size);
switch (*size) {
case 0:
w_err(writer, SERD_ERR_BAD_ARG, "invalid UTF-8: %X\n", utf8[0]);