diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/digest.c | 272 |
1 files changed, 172 insertions, 100 deletions
diff --git a/src/digest.c b/src/digest.c index c14a3f5..32cae63 100644 --- a/src/digest.c +++ b/src/digest.c @@ -1,5 +1,5 @@ /* - Copyright 2012-2020 David Robillard <d@drobilla.net> + Copyright 2012-2021 David Robillard <d@drobilla.net> Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above @@ -16,150 +16,222 @@ #include "zix/digest.h" -#ifdef __SSE4_2__ -# include <smmintrin.h> -#endif - #include <assert.h> #include <stdint.h> +#include <string.h> -#ifdef __SSE4_2__ - -// SSE 4.2 CRC32 +#if defined(__clang__) && __clang_major__ >= 12 +# define FALLTHROUGH() __attribute__((fallthrough)) +#elif defined(__GNUC__) && !defined(__clang__) +# define FALLTHROUGH() __attribute__((fallthrough)) +#else +# define FALLTHROUGH() +#endif -uint32_t -zix_digest_start(void) -{ - return 1; -} +/* + 64-bit hash: Essentially fasthash64, implemented here in an aligned/padded + and a general UB-free variant. +*/ static inline uint64_t -load64(const void* const mem) +mix64(uint64_t h) { - uint64_t r = 0u; - memcpy(&r, mem, sizeof(r)); - return r; + h ^= h >> 23u; + h *= 0x2127599BF4325C37ull; + h ^= h >> 47u; + return h; } -static inline uint32_t -load32(const void* const mem) +uint64_t +zix_digest64(const uint64_t seed, const void* const key, const size_t len) { - uint32_t r = 0u; - memcpy(&r, mem, sizeof(r)); - return r; -} + static const uint64_t m = 0x880355F21E6D1965ull; -static inline uint16_t -load16(const void* const mem) -{ - uint16_t r = 0u; - memcpy(&r, mem, sizeof(r)); - return r; -} + const size_t n_blocks = len / sizeof(uint64_t); + const uint8_t* const data = (const uint8_t*)key; + const uint8_t* const blocks_end = data + (n_blocks * sizeof(uint64_t)); -uint32_t -zix_digest_add(uint32_t hash, const void* const buf, const size_t len) -{ - const uint8_t* str = (const uint8_t*)buf; + uint64_t h = seed ^ (len * m); + for (size_t i = 0u; i < n_blocks; ++i) { + uint64_t k = 0u; + memcpy(&k, data, sizeof(uint64_t)); -# ifdef __x86_64__ - for (size_t i = 0; i < (len / sizeof(uint64_t)); ++i) { - hash = (uint32_t)_mm_crc32_u64(hash, load64(str)); - str += sizeof(uint64_t); - } - if (len & sizeof(uint32_t)) { - hash = _mm_crc32_u32(hash, load32(str)); - str += sizeof(uint32_t); - } -# else - for (size_t i = 0; i < (len / sizeof(uint32_t)); ++i) { - hash = _mm_crc32_u32(hash, load32(str)); - str += sizeof(uint32_t); + h ^= mix64(k); + h *= m; } -# endif - if (len & sizeof(uint16_t)) { - hash = _mm_crc32_u16(hash, load16(str)); - str += sizeof(uint16_t); - } - if (len & sizeof(uint8_t)) { - hash = _mm_crc32_u8(hash, *str); + + const uint8_t* const tail = blocks_end; + uint64_t v = 0u; + + switch (len & 7u) { + case 7: + v |= (uint64_t)tail[6] << 48u; + FALLTHROUGH(); + case 6: + v |= (uint64_t)tail[5] << 40u; + FALLTHROUGH(); + case 5: + v |= (uint64_t)tail[4] << 32u; + FALLTHROUGH(); + case 4: + v |= (uint64_t)tail[3] << 24u; + FALLTHROUGH(); + case 3: + v |= (uint64_t)tail[2] << 16u; + FALLTHROUGH(); + case 2: + v |= (uint64_t)tail[1] << 8u; + FALLTHROUGH(); + case 1: + v |= (uint64_t)tail[0]; + + h ^= mix64(v); + h *= m; } - return hash; + return mix64(h); } -uint32_t -zix_digest_add_64(uint32_t hash, const void* const buf, const size_t len) +uint64_t +zix_digest64_aligned(const uint64_t seed, const void* const key, size_t len) { - assert((uintptr_t)buf % sizeof(uint64_t) == 0); - assert(len % sizeof(uint64_t) == 0); + static const uint64_t m = 0x880355F21E6D1965ull; -# ifdef __x86_64__ - const uint64_t* ptr = (const uint64_t*)buf; + assert((uintptr_t)key % sizeof(uint64_t) == 0u); + assert(len % sizeof(uint64_t) == 0u); - for (size_t i = 0; i < (len / sizeof(uint64_t)); ++i) { - hash = (uint32_t)_mm_crc32_u64(hash, *ptr); - ++ptr; - } - - return hash; -# else - const uint32_t* ptr = (const uint32_t*)buf; + const uint64_t* const blocks = (const uint64_t*)key; + const size_t n_blocks = len / sizeof(uint64_t); + uint64_t h = seed ^ (len * m); - for (size_t i = 0; i < (len / sizeof(uint32_t)); ++i) { - hash = _mm_crc32_u32(hash, *ptr); - ++ptr; + for (size_t i = 0u; i < n_blocks; ++i) { + h ^= mix64(blocks[i]); + h *= m; } - return hash; -# endif + return mix64(h); } -uint32_t -zix_digest_add_ptr(const uint32_t hash, const void* const ptr) -{ -# ifdef __x86_64__ - return (uint32_t)_mm_crc32_u64(hash, (uintptr_t)ptr); -# else - return _mm_crc32_u32(hash, (uintptr_t)ptr); -# endif -} +/* + 32-bit hash: Essentially murmur3, reimplemented here in an aligned/padded and + a general UB-free variant. +*/ -#else +/** + Rotate left by some count of bits. -// Classic DJB hash + This is recognized by any halfway decent compiler and compiled to a single + instruction on architectures that have one. +*/ +static inline uint32_t +rotl32(const uint32_t val, const uint32_t bits) +{ + return ((val << bits) | (val >> (32 - bits))); +} -uint32_t -zix_digest_start(void) +static inline uint32_t +mix32(uint32_t h) { - return 5381; + h ^= h >> 16u; + h *= 0x85EBCA6Bu; + h ^= h >> 13u; + h *= 0xC2B2AE35u; + h ^= h >> 16u; + return h; } uint32_t -zix_digest_add(uint32_t hash, const void* const buf, const size_t len) +zix_digest32(const uint32_t seed, const void* const key, const size_t len) { - const uint8_t* str = (const uint8_t*)buf; + static const uint32_t c1 = 0xCC9E2D51u; + static const uint32_t c2 = 0x1B873593u; + + // Process as many 32-bit blocks as possible + const size_t n_blocks = len / sizeof(uint32_t); + const uint8_t* data = (const uint8_t*)key; + const uint8_t* const blocks_end = data + (n_blocks * sizeof(uint32_t)); + uint32_t h = seed; + for (; data != blocks_end; data += sizeof(uint32_t)) { + uint32_t k = 0u; + memcpy(&k, data, sizeof(uint32_t)); + + k *= c1; + k = rotl32(k, 15); + k *= c2; + + h ^= k; + h = rotl32(h, 13); + h = h * 5u + 0xE6546B64u; + } - for (size_t i = 0; i < len; ++i) { - hash = (hash << 5u) + hash + str[i]; + // Process any trailing bytes + uint32_t k = 0u; + switch (len & 3u) { + case 3u: + k ^= (uint32_t)data[2u] << 16u; + FALLTHROUGH(); + case 2u: + k ^= (uint32_t)data[1u] << 8u; + FALLTHROUGH(); + case 1u: + k ^= (uint32_t)data[0u]; + + k *= c1; + k = rotl32(k, 15u); + k *= c2; + h ^= k; } - return hash; + return mix32(h ^ (uint32_t)len); } uint32_t -zix_digest_add_64(uint32_t hash, const void* const buf, const size_t len) +zix_digest32_aligned(const uint32_t seed, + const void* const key, + const size_t len) { - assert((uintptr_t)buf % sizeof(uint64_t) == 0); - assert(len % sizeof(uint64_t) == 0); + static const uint32_t c1 = 0xCC9E2D51u; + static const uint32_t c2 = 0x1B873593u; - return zix_digest_add(hash, buf, len); + assert((uintptr_t)key % sizeof(uint32_t) == 0u); + assert(len % sizeof(uint32_t) == 0u); + + const uint32_t* const blocks = (const uint32_t*)key; + const size_t n_blocks = len / sizeof(uint32_t); + uint32_t h = seed; + for (size_t i = 0u; i < n_blocks; ++i) { + uint32_t k = blocks[i]; + + k *= c1; + k = rotl32(k, 15); + k *= c2; + + h ^= k; + h = rotl32(h, 13); + h = h * 5u + 0xE6546B64u; + } + + return mix32(h ^ (uint32_t)len); } -uint32_t -zix_digest_add_ptr(const uint32_t hash, const void* const ptr) +// Native word size wrapper + +size_t +zix_digest(const size_t seed, const void* const buf, const size_t len) { - return zix_digest_add(hash, &ptr, sizeof(ptr)); +#if UINTPTR_MAX >= UINT64_MAX + return zix_digest64(seed, buf, len); +#else + return zix_digest32(seed, buf, len); +#endif } +size_t +zix_digest_aligned(const size_t seed, const void* const buf, const size_t len) +{ +#if UINTPTR_MAX >= UINT64_MAX + return zix_digest64_aligned(seed, buf, len); +#else + return zix_digest32_aligned(seed, buf, len); #endif +} |