diff options
Diffstat (limited to 'src/zix/digest.c')
-rw-r--r-- | src/zix/digest.c | 250 |
1 files changed, 171 insertions, 79 deletions
diff --git a/src/zix/digest.c b/src/zix/digest.c index 47d27b94..dcfadf8f 100644 --- a/src/zix/digest.c +++ b/src/zix/digest.c @@ -1,5 +1,5 @@ /* - Copyright 2012-2020 David Robillard <d@drobilla.net> + Copyright 2012-2021 David Robillard <d@drobilla.net> Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above @@ -16,126 +16,218 @@ #include "zix/digest.h" -#ifdef __SSE4_2__ -# include <smmintrin.h> -#endif - #include <assert.h> #include <stdint.h> +#include <string.h> -#ifdef __SSE4_2__ +#if defined(__clang__) && __clang_major__ >= 12 +# define FALLTHROUGH() __attribute__((fallthrough)) +#elif defined(__GNUC__) && !defined(__clang__) +# define FALLTHROUGH() __attribute__((fallthrough)) +#else +# define FALLTHROUGH() +#endif -// SSE 4.2 CRC32 +/* + 64-bit hash: Essentially fasthash64, implemented here in an aligned/padded + and a general UB-free variant. +*/ -uint32_t -zix_digest_start(void) +static inline uint64_t +mix64(uint64_t h) { - return 1; + h ^= h >> 23u; + h *= 0x2127599BF4325C37ull; + h ^= h >> 47u; + return h; } -uint32_t -zix_digest_add(uint32_t hash, const void* const buf, const size_t len) +uint64_t +zix_digest64(const uint64_t seed, const void* const key, const size_t len) { - const uint8_t* str = (const uint8_t*)buf; + static const uint64_t m = 0x880355F21E6D1965ull; -# ifdef __x86_64__ - for (size_t i = 0; i < (len / sizeof(uint64_t)); ++i) { - hash = (uint32_t)_mm_crc32_u64(hash, *(const uint64_t*)str); - str += sizeof(uint64_t); - } - if (len & sizeof(uint32_t)) { - hash = _mm_crc32_u32(hash, *(const uint32_t*)str); - str += sizeof(uint32_t); - } -# else - for (size_t i = 0; i < (len / sizeof(uint32_t)); ++i) { - hash = _mm_crc32_u32(hash, *(const uint32_t*)str); - str += sizeof(uint32_t); - } -# endif - if (len & sizeof(uint16_t)) { - hash = _mm_crc32_u16(hash, *(const uint16_t*)str); - str += sizeof(uint16_t); + const uint64_t* const blocks = (const uint64_t*)key; + const size_t n_blocks = len / sizeof(uint64_t); + + uint64_t h = seed ^ (len * m); + for (size_t i = 0u; i < n_blocks; ++i) { + h ^= mix64(blocks[i]); + h *= m; } - if (len & sizeof(uint8_t)) { - hash = _mm_crc32_u8(hash, *(const uint8_t*)str); + + const uint8_t* const tail = (const unsigned char*)(blocks + n_blocks); + uint64_t v = 0u; + + switch (len & 7u) { + case 7: + v |= (uint64_t)tail[6] << 48u; + FALLTHROUGH(); + case 6: + v |= (uint64_t)tail[5] << 40u; + FALLTHROUGH(); + case 5: + v |= (uint64_t)tail[4] << 32u; + FALLTHROUGH(); + case 4: + v |= (uint64_t)tail[3] << 24u; + FALLTHROUGH(); + case 3: + v |= (uint64_t)tail[2] << 16u; + FALLTHROUGH(); + case 2: + v |= (uint64_t)tail[1] << 8u; + FALLTHROUGH(); + case 1: + v |= (uint64_t)tail[0]; + + h ^= mix64(v); + h *= m; } - return hash; + return mix64(h); } -uint32_t -zix_digest_add_64(uint32_t hash, const void* const buf, const size_t len) +uint64_t +zix_digest64_aligned(const uint64_t seed, const void* const key, size_t len) { - assert((uintptr_t)buf % sizeof(uint64_t) == 0); - assert(len % sizeof(uint64_t) == 0); - -# ifdef __x86_64__ - const uint64_t* ptr = (const uint64_t*)buf; + static const uint64_t m = 0x880355F21E6D1965ull; - for (size_t i = 0; i < (len / sizeof(uint64_t)); ++i) { - hash = (uint32_t)_mm_crc32_u64(hash, *ptr); - ++ptr; - } + assert((uintptr_t)key % sizeof(uint64_t) == 0u); + assert(len % sizeof(uint64_t) == 0u); - return hash; -# else - const uint32_t* ptr = (const uint32_t*)buf; + const uint64_t* const blocks = (const uint64_t*)key; + const size_t n_blocks = len / sizeof(uint64_t); + uint64_t h = seed ^ (len * m); - for (size_t i = 0; i < (len / sizeof(uint32_t)); ++i) { - hash = _mm_crc32_u32(hash, *ptr); - ++ptr; + for (size_t i = 0u; i < n_blocks; ++i) { + h ^= mix64(blocks[i]); + h *= m; } - return hash; -# endif + return mix64(h); } -uint32_t -zix_digest_add_ptr(const uint32_t hash, const void* const ptr) -{ -# ifdef __x86_64__ - return (uint32_t)_mm_crc32_u64(hash, (uintptr_t)ptr); -# else - return _mm_crc32_u32(hash, (uintptr_t)ptr); -# endif -} +/* + 32-bit hash: Essentially murmur3, reimplemented here in an aligned/padded and + a general UB-free variant. +*/ -#else +/** + Rotate left by some count of bits. -// Classic DJB hash + This is recognized by any halfway decent compiler and compiled to a single + instruction on architectures that have one. +*/ +static inline uint32_t +rotl32(const uint32_t val, const uint32_t bits) +{ + return ((val << bits) | (val >> (32 - bits))); +} -uint32_t -zix_digest_start(void) +static inline uint32_t +mix32(uint32_t h) { - return 5381; + h ^= h >> 16u; + h *= 0x85EBCA6Bu; + h ^= h >> 13u; + h *= 0xC2B2AE35u; + h ^= h >> 16u; + return h; } uint32_t -zix_digest_add(uint32_t hash, const void* const buf, const size_t len) +zix_digest32(const uint32_t seed, const void* const key, const size_t len) { - const uint8_t* str = (const uint8_t*)buf; + static const uint32_t c1 = 0xCC9E2D51u; + static const uint32_t c2 = 0x1B873593u; + + // Process as many 32-bit blocks as possible + const size_t n_blocks = len / sizeof(uint32_t); + const uint8_t* data = (const uint8_t*)key; + const uint8_t* const blocks_end = data + (n_blocks * sizeof(uint32_t)); + uint32_t h = seed; + for (; data != blocks_end; data += sizeof(uint32_t)) { + uint32_t k = 0u; + memcpy(&k, data, sizeof(uint32_t)); + + k *= c1; + k = rotl32(k, 15); + k *= c2; + + h ^= k; + h = rotl32(h, 13); + h = h * 5u + 0xE6546B64u; + } - for (size_t i = 0; i < len; ++i) { - hash = (hash << 5u) + hash + str[i]; + // Process any trailing bytes + uint32_t k = 0u; + switch (len & 3u) { + case 3u: + k ^= (uint32_t)data[2u] << 16u; + FALLTHROUGH(); + case 2u: + k ^= (uint32_t)data[1u] << 8u; + FALLTHROUGH(); + case 1u: + k ^= (uint32_t)data[0u]; + + k *= c1; + k = rotl32(k, 15u); + k *= c2; + h ^= k; } - return hash; + return mix32(h ^ (uint32_t)len); } uint32_t -zix_digest_add_64(uint32_t hash, const void* const buf, const size_t len) +zix_digest32_aligned(const uint32_t seed, + const void* const key, + const size_t len) { - assert((uintptr_t)buf % sizeof(uint64_t) == 0); - assert(len % sizeof(uint64_t) == 0); + static const uint32_t c1 = 0xCC9E2D51u; + static const uint32_t c2 = 0x1B873593u; + + assert((uintptr_t)key % sizeof(uint32_t) == 0u); + assert(len % sizeof(uint32_t) == 0u); + + const uint32_t* const blocks = (const uint32_t*)key; + const size_t n_blocks = len / sizeof(uint32_t); + uint32_t h = seed; + for (size_t i = 0u; i < n_blocks; ++i) { + uint32_t k = blocks[i]; + + k *= c1; + k = rotl32(k, 15); + k *= c2; - return zix_digest_add(hash, buf, len); + h ^= k; + h = rotl32(h, 13); + h = h * 5u + 0xE6546B64u; + } + + return mix32(h ^ (uint32_t)len); } -uint32_t -zix_digest_add_ptr(const uint32_t hash, const void* const ptr) +// Native word size wrapper + +size_t +zix_digest(const size_t seed, const void* const buf, const size_t len) { - return zix_digest_add(hash, &ptr, sizeof(ptr)); +#if UINTPTR_MAX >= UINT64_MAX + return zix_digest64(seed, buf, len); +#else + return zix_digest32(seed, buf, len); +#endif } +size_t +zix_digest_aligned(const size_t seed, const void* const buf, const size_t len) +{ +#if UINTPTR_MAX >= UINT64_MAX + return zix_digest64_aligned(seed, buf, len); +#else + return zix_digest32_aligned(seed, buf, len); #endif +} |