From 9c29938c172e2423f67925274a18b4f1c1bb42cf Mon Sep 17 00:00:00 2001 From: David Robillard Date: Sun, 9 Jul 2017 20:09:36 +0200 Subject: Factor out UTF-8 character size counting --- src/serd_internal.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'src/serd_internal.h') diff --git a/src/serd_internal.h b/src/serd_internal.h index 55f6a6b6..814b9a5b 100644 --- a/src/serd_internal.h +++ b/src/serd_internal.h @@ -325,12 +325,35 @@ is_windows_path(const uint8_t* path) && (path[2] == '/' || path[2] == '\\'); } +/* String utilities */ + size_t serd_substrlen(const uint8_t* str, const size_t len, size_t* n_bytes, SerdNodeFlags* flags); +static inline uint32_t +utf8_num_bytes(const uint8_t c) +{ + if ((c & 0x80) == 0) { // Starts with `0' + return 1; + } + +#ifdef HAVE_BUILTIN_CLZ + return __builtin_clz(~c << 24); +#else + if ((c & 0xE0) == 0xC0) { // Starts with `110' + return 2; + } else if ((c & 0xF0) == 0xE0) { // Starts with `1110' + return 3; + } else if ((c & 0xF8) == 0xF0) { // Starts with `11110' + return 4; + } + return 0; +#endif +} + /* URI utilities */ static inline bool -- cgit v1.2.1