diff options
author | David Robillard <d@drobilla.net> | 2023-03-31 10:50:12 -0400 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2023-12-02 18:49:07 -0500 |
commit | d22653dfe356e3da1354cdb0f7915e29c4a33e3b (patch) | |
tree | 496e678ba61a436e2bf0b11b079bf6115ba630fa /src/string_utils.h | |
parent | 469034ec4ae5c0b5230ca30c40aaa9b1432c13a2 (diff) | |
download | serd-d22653dfe356e3da1354cdb0f7915e29c4a33e3b.tar.gz serd-d22653dfe356e3da1354cdb0f7915e29c4a33e3b.tar.bz2 serd-d22653dfe356e3da1354cdb0f7915e29c4a33e3b.zip |
Factor out NTriples reader
Diffstat (limited to 'src/string_utils.h')
-rw-r--r-- | src/string_utils.h | 16 |
1 files changed, 14 insertions, 2 deletions
diff --git a/src/string_utils.h b/src/string_utils.h index 4102a54c..564c58ad 100644 --- a/src/string_utils.h +++ b/src/string_utils.h @@ -114,20 +114,32 @@ utf8_num_bytes(const uint8_t leading) : 0U; // Invalid } +static inline unsigned +utf8_num_bytes_for_codepoint(const uint32_t code) +{ + return (code < 0x00000080) ? 1U + : (code < 0x00000800) ? 2U + : (code < 0x00010000) ? 3U + : (code < 0x00110000) ? 4U + : 0U; +} + /// Return the code point of a UTF-8 character with known length static inline uint32_t -parse_counted_utf8_char(const uint8_t* utf8, size_t size) +parse_counted_utf8_char(const uint8_t* const utf8, const size_t size) { uint32_t c = utf8[0] & ((1U << (8U - size)) - 1U); + for (size_t i = 1; i < size; ++i) { c = (c << 6) | (utf8[i] & 0x3FU); } + return c; } /// Parse a UTF-8 character, set *size to the length, and return the code point static inline uint32_t -parse_utf8_char(const uint8_t* utf8, size_t* size) +parse_utf8_char(const uint8_t* const utf8, size_t* const size) { switch (*size = utf8_num_bytes(utf8[0])) { case 1: |