aboutsummaryrefslogtreecommitdiffstats
path: root/src/writer.c
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2017-07-09 20:09:36 +0200
committerDavid Robillard <d@drobilla.net>2017-07-10 12:06:56 +0200
commit9c29938c172e2423f67925274a18b4f1c1bb42cf (patch)
treef75ce11c9827ef51c570646b4146793b17f70c90 /src/writer.c
parent4d535bbe0390ed4f03c611e433145c9e49cbf3ad (diff)
downloadserd-9c29938c172e2423f67925274a18b4f1c1bb42cf.tar.gz
serd-9c29938c172e2423f67925274a18b4f1c1bb42cf.tar.bz2
serd-9c29938c172e2423f67925274a18b4f1c1bb42cf.zip
Factor out UTF-8 character size counting
Diffstat (limited to 'src/writer.c')
-rw-r--r--src/writer.c24
1 files changed, 1 insertions, 23 deletions
diff --git a/src/writer.c b/src/writer.c
index 63b8d5af..c293b4f8 100644
--- a/src/writer.c
+++ b/src/writer.c
@@ -158,33 +158,11 @@ sink(const void* buf, size_t len, SerdWriter* writer)
return serd_byte_sink_write(buf, len, &writer->byte_sink);
}
-// Return the number of bytes in a UTF-8 character
-static inline uint32_t
-utf8_num_bytes(const uint8_t* utf8)
-{
- if ((utf8[0] & 0x80) == 0) { // Starts with `0'
- return 1;
- }
-
-#ifdef HAVE_BUILTIN_CLZ
- return __builtin_clz(~utf8[0] << 24);
-#else
- if ((utf8[0] & 0xE0) == 0xC0) { // Starts with `110'
- return 2;
- } else if ((utf8[0] & 0xF0) == 0xE0) { // Starts with `1110'
- return 3;
- } else if ((utf8[0] & 0xF8) == 0xF0) { // Starts with `11110'
- return 4;
- }
- return 0;
-#endif
-}
-
// Parse a UTF-8 character, set *size to the length, and return the code point
static inline uint32_t
parse_utf8_char(SerdWriter* writer, const uint8_t* utf8, size_t* size)
{
- switch (*size = utf8_num_bytes(utf8)) {
+ switch (*size = utf8_num_bytes(utf8[0])) {
case 1: case 2: case 3: case 4:
break;
default: