diff options
author | David Robillard <d@drobilla.net> | 2017-07-09 20:09:36 +0200 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2017-07-10 12:06:56 +0200 |
commit | 9c29938c172e2423f67925274a18b4f1c1bb42cf (patch) | |
tree | f75ce11c9827ef51c570646b4146793b17f70c90 /src/serd_internal.h | |
parent | 4d535bbe0390ed4f03c611e433145c9e49cbf3ad (diff) | |
download | serd-9c29938c172e2423f67925274a18b4f1c1bb42cf.tar.gz serd-9c29938c172e2423f67925274a18b4f1c1bb42cf.tar.bz2 serd-9c29938c172e2423f67925274a18b4f1c1bb42cf.zip |
Factor out UTF-8 character size counting
Diffstat (limited to 'src/serd_internal.h')
-rw-r--r-- | src/serd_internal.h | 23 |
1 files changed, 23 insertions, 0 deletions
diff --git a/src/serd_internal.h b/src/serd_internal.h index 55f6a6b6..814b9a5b 100644 --- a/src/serd_internal.h +++ b/src/serd_internal.h @@ -325,12 +325,35 @@ is_windows_path(const uint8_t* path) && (path[2] == '/' || path[2] == '\\'); } +/* String utilities */ + size_t serd_substrlen(const uint8_t* str, const size_t len, size_t* n_bytes, SerdNodeFlags* flags); +static inline uint32_t +utf8_num_bytes(const uint8_t c) +{ + if ((c & 0x80) == 0) { // Starts with `0' + return 1; + } + +#ifdef HAVE_BUILTIN_CLZ + return __builtin_clz(~c << 24); +#else + if ((c & 0xE0) == 0xC0) { // Starts with `110' + return 2; + } else if ((c & 0xF0) == 0xE0) { // Starts with `1110' + return 3; + } else if ((c & 0xF8) == 0xF0) { // Starts with `11110' + return 4; + } + return 0; +#endif +} + /* URI utilities */ static inline bool |