aboutsummaryrefslogtreecommitdiffstats
path: root/src/serd_internal.h
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2017-07-09 20:09:36 +0200
committerDavid Robillard <d@drobilla.net>2017-07-10 12:06:56 +0200
commit9c29938c172e2423f67925274a18b4f1c1bb42cf (patch)
treef75ce11c9827ef51c570646b4146793b17f70c90 /src/serd_internal.h
parent4d535bbe0390ed4f03c611e433145c9e49cbf3ad (diff)
downloadserd-9c29938c172e2423f67925274a18b4f1c1bb42cf.tar.gz
serd-9c29938c172e2423f67925274a18b4f1c1bb42cf.tar.bz2
serd-9c29938c172e2423f67925274a18b4f1c1bb42cf.zip
Factor out UTF-8 character size counting
Diffstat (limited to 'src/serd_internal.h')
-rw-r--r--src/serd_internal.h23
1 files changed, 23 insertions, 0 deletions
diff --git a/src/serd_internal.h b/src/serd_internal.h
index 55f6a6b6..814b9a5b 100644
--- a/src/serd_internal.h
+++ b/src/serd_internal.h
@@ -325,12 +325,35 @@ is_windows_path(const uint8_t* path)
&& (path[2] == '/' || path[2] == '\\');
}
+/* String utilities */
+
size_t
serd_substrlen(const uint8_t* str,
const size_t len,
size_t* n_bytes,
SerdNodeFlags* flags);
+static inline uint32_t
+utf8_num_bytes(const uint8_t c)
+{
+ if ((c & 0x80) == 0) { // Starts with `0'
+ return 1;
+ }
+
+#ifdef HAVE_BUILTIN_CLZ
+ return __builtin_clz(~c << 24);
+#else
+ if ((c & 0xE0) == 0xC0) { // Starts with `110'
+ return 2;
+ } else if ((c & 0xF0) == 0xE0) { // Starts with `1110'
+ return 3;
+ } else if ((c & 0xF8) == 0xF0) { // Starts with `11110'
+ return 4;
+ }
+ return 0;
+#endif
+}
+
/* URI utilities */
static inline bool