summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/digest.c272
1 files changed, 172 insertions, 100 deletions
diff --git a/src/digest.c b/src/digest.c
index c14a3f5..32cae63 100644
--- a/src/digest.c
+++ b/src/digest.c
@@ -1,5 +1,5 @@
/*
- Copyright 2012-2020 David Robillard <d@drobilla.net>
+ Copyright 2012-2021 David Robillard <d@drobilla.net>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
@@ -16,150 +16,222 @@
#include "zix/digest.h"
-#ifdef __SSE4_2__
-# include <smmintrin.h>
-#endif
-
#include <assert.h>
#include <stdint.h>
+#include <string.h>
-#ifdef __SSE4_2__
-
-// SSE 4.2 CRC32
+#if defined(__clang__) && __clang_major__ >= 12
+# define FALLTHROUGH() __attribute__((fallthrough))
+#elif defined(__GNUC__) && !defined(__clang__)
+# define FALLTHROUGH() __attribute__((fallthrough))
+#else
+# define FALLTHROUGH()
+#endif
-uint32_t
-zix_digest_start(void)
-{
- return 1;
-}
+/*
+ 64-bit hash: Essentially fasthash64, implemented here in an aligned/padded
+ and a general UB-free variant.
+*/
static inline uint64_t
-load64(const void* const mem)
+mix64(uint64_t h)
{
- uint64_t r = 0u;
- memcpy(&r, mem, sizeof(r));
- return r;
+ h ^= h >> 23u;
+ h *= 0x2127599BF4325C37ull;
+ h ^= h >> 47u;
+ return h;
}
-static inline uint32_t
-load32(const void* const mem)
+uint64_t
+zix_digest64(const uint64_t seed, const void* const key, const size_t len)
{
- uint32_t r = 0u;
- memcpy(&r, mem, sizeof(r));
- return r;
-}
+ static const uint64_t m = 0x880355F21E6D1965ull;
-static inline uint16_t
-load16(const void* const mem)
-{
- uint16_t r = 0u;
- memcpy(&r, mem, sizeof(r));
- return r;
-}
+ const size_t n_blocks = len / sizeof(uint64_t);
+ const uint8_t* const data = (const uint8_t*)key;
+ const uint8_t* const blocks_end = data + (n_blocks * sizeof(uint64_t));
-uint32_t
-zix_digest_add(uint32_t hash, const void* const buf, const size_t len)
-{
- const uint8_t* str = (const uint8_t*)buf;
+ uint64_t h = seed ^ (len * m);
+ for (size_t i = 0u; i < n_blocks; ++i) {
+ uint64_t k = 0u;
+ memcpy(&k, data, sizeof(uint64_t));
-# ifdef __x86_64__
- for (size_t i = 0; i < (len / sizeof(uint64_t)); ++i) {
- hash = (uint32_t)_mm_crc32_u64(hash, load64(str));
- str += sizeof(uint64_t);
- }
- if (len & sizeof(uint32_t)) {
- hash = _mm_crc32_u32(hash, load32(str));
- str += sizeof(uint32_t);
- }
-# else
- for (size_t i = 0; i < (len / sizeof(uint32_t)); ++i) {
- hash = _mm_crc32_u32(hash, load32(str));
- str += sizeof(uint32_t);
+ h ^= mix64(k);
+ h *= m;
}
-# endif
- if (len & sizeof(uint16_t)) {
- hash = _mm_crc32_u16(hash, load16(str));
- str += sizeof(uint16_t);
- }
- if (len & sizeof(uint8_t)) {
- hash = _mm_crc32_u8(hash, *str);
+
+ const uint8_t* const tail = blocks_end;
+ uint64_t v = 0u;
+
+ switch (len & 7u) {
+ case 7:
+ v |= (uint64_t)tail[6] << 48u;
+ FALLTHROUGH();
+ case 6:
+ v |= (uint64_t)tail[5] << 40u;
+ FALLTHROUGH();
+ case 5:
+ v |= (uint64_t)tail[4] << 32u;
+ FALLTHROUGH();
+ case 4:
+ v |= (uint64_t)tail[3] << 24u;
+ FALLTHROUGH();
+ case 3:
+ v |= (uint64_t)tail[2] << 16u;
+ FALLTHROUGH();
+ case 2:
+ v |= (uint64_t)tail[1] << 8u;
+ FALLTHROUGH();
+ case 1:
+ v |= (uint64_t)tail[0];
+
+ h ^= mix64(v);
+ h *= m;
}
- return hash;
+ return mix64(h);
}
-uint32_t
-zix_digest_add_64(uint32_t hash, const void* const buf, const size_t len)
+uint64_t
+zix_digest64_aligned(const uint64_t seed, const void* const key, size_t len)
{
- assert((uintptr_t)buf % sizeof(uint64_t) == 0);
- assert(len % sizeof(uint64_t) == 0);
+ static const uint64_t m = 0x880355F21E6D1965ull;
-# ifdef __x86_64__
- const uint64_t* ptr = (const uint64_t*)buf;
+ assert((uintptr_t)key % sizeof(uint64_t) == 0u);
+ assert(len % sizeof(uint64_t) == 0u);
- for (size_t i = 0; i < (len / sizeof(uint64_t)); ++i) {
- hash = (uint32_t)_mm_crc32_u64(hash, *ptr);
- ++ptr;
- }
-
- return hash;
-# else
- const uint32_t* ptr = (const uint32_t*)buf;
+ const uint64_t* const blocks = (const uint64_t*)key;
+ const size_t n_blocks = len / sizeof(uint64_t);
+ uint64_t h = seed ^ (len * m);
- for (size_t i = 0; i < (len / sizeof(uint32_t)); ++i) {
- hash = _mm_crc32_u32(hash, *ptr);
- ++ptr;
+ for (size_t i = 0u; i < n_blocks; ++i) {
+ h ^= mix64(blocks[i]);
+ h *= m;
}
- return hash;
-# endif
+ return mix64(h);
}
-uint32_t
-zix_digest_add_ptr(const uint32_t hash, const void* const ptr)
-{
-# ifdef __x86_64__
- return (uint32_t)_mm_crc32_u64(hash, (uintptr_t)ptr);
-# else
- return _mm_crc32_u32(hash, (uintptr_t)ptr);
-# endif
-}
+/*
+ 32-bit hash: Essentially murmur3, reimplemented here in an aligned/padded and
+ a general UB-free variant.
+*/
-#else
+/**
+ Rotate left by some count of bits.
-// Classic DJB hash
+ This is recognized by any halfway decent compiler and compiled to a single
+ instruction on architectures that have one.
+*/
+static inline uint32_t
+rotl32(const uint32_t val, const uint32_t bits)
+{
+ return ((val << bits) | (val >> (32 - bits)));
+}
-uint32_t
-zix_digest_start(void)
+static inline uint32_t
+mix32(uint32_t h)
{
- return 5381;
+ h ^= h >> 16u;
+ h *= 0x85EBCA6Bu;
+ h ^= h >> 13u;
+ h *= 0xC2B2AE35u;
+ h ^= h >> 16u;
+ return h;
}
uint32_t
-zix_digest_add(uint32_t hash, const void* const buf, const size_t len)
+zix_digest32(const uint32_t seed, const void* const key, const size_t len)
{
- const uint8_t* str = (const uint8_t*)buf;
+ static const uint32_t c1 = 0xCC9E2D51u;
+ static const uint32_t c2 = 0x1B873593u;
+
+ // Process as many 32-bit blocks as possible
+ const size_t n_blocks = len / sizeof(uint32_t);
+ const uint8_t* data = (const uint8_t*)key;
+ const uint8_t* const blocks_end = data + (n_blocks * sizeof(uint32_t));
+ uint32_t h = seed;
+ for (; data != blocks_end; data += sizeof(uint32_t)) {
+ uint32_t k = 0u;
+ memcpy(&k, data, sizeof(uint32_t));
+
+ k *= c1;
+ k = rotl32(k, 15);
+ k *= c2;
+
+ h ^= k;
+ h = rotl32(h, 13);
+ h = h * 5u + 0xE6546B64u;
+ }
- for (size_t i = 0; i < len; ++i) {
- hash = (hash << 5u) + hash + str[i];
+ // Process any trailing bytes
+ uint32_t k = 0u;
+ switch (len & 3u) {
+ case 3u:
+ k ^= (uint32_t)data[2u] << 16u;
+ FALLTHROUGH();
+ case 2u:
+ k ^= (uint32_t)data[1u] << 8u;
+ FALLTHROUGH();
+ case 1u:
+ k ^= (uint32_t)data[0u];
+
+ k *= c1;
+ k = rotl32(k, 15u);
+ k *= c2;
+ h ^= k;
}
- return hash;
+ return mix32(h ^ (uint32_t)len);
}
uint32_t
-zix_digest_add_64(uint32_t hash, const void* const buf, const size_t len)
+zix_digest32_aligned(const uint32_t seed,
+ const void* const key,
+ const size_t len)
{
- assert((uintptr_t)buf % sizeof(uint64_t) == 0);
- assert(len % sizeof(uint64_t) == 0);
+ static const uint32_t c1 = 0xCC9E2D51u;
+ static const uint32_t c2 = 0x1B873593u;
- return zix_digest_add(hash, buf, len);
+ assert((uintptr_t)key % sizeof(uint32_t) == 0u);
+ assert(len % sizeof(uint32_t) == 0u);
+
+ const uint32_t* const blocks = (const uint32_t*)key;
+ const size_t n_blocks = len / sizeof(uint32_t);
+ uint32_t h = seed;
+ for (size_t i = 0u; i < n_blocks; ++i) {
+ uint32_t k = blocks[i];
+
+ k *= c1;
+ k = rotl32(k, 15);
+ k *= c2;
+
+ h ^= k;
+ h = rotl32(h, 13);
+ h = h * 5u + 0xE6546B64u;
+ }
+
+ return mix32(h ^ (uint32_t)len);
}
-uint32_t
-zix_digest_add_ptr(const uint32_t hash, const void* const ptr)
+// Native word size wrapper
+
+size_t
+zix_digest(const size_t seed, const void* const buf, const size_t len)
{
- return zix_digest_add(hash, &ptr, sizeof(ptr));
+#if UINTPTR_MAX >= UINT64_MAX
+ return zix_digest64(seed, buf, len);
+#else
+ return zix_digest32(seed, buf, len);
+#endif
}
+size_t
+zix_digest_aligned(const size_t seed, const void* const buf, const size_t len)
+{
+#if UINTPTR_MAX >= UINT64_MAX
+ return zix_digest64_aligned(seed, buf, len);
+#else
+ return zix_digest32_aligned(seed, buf, len);
#endif
+}