summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--benchmark/dict_bench.c82
-rw-r--r--include/.clang-tidy1
-rw-r--r--include/zix/hash.h277
-rw-r--r--meson.build2
-rw-r--r--src/hash.c403
-rw-r--r--test/.clang-tidy1
-rw-r--r--test/hash_test.c314
7 files changed, 776 insertions, 304 deletions
diff --git a/benchmark/dict_bench.c b/benchmark/dict_bench.c
index 952aae3..62c1283 100644
--- a/benchmark/dict_bench.c
+++ b/benchmark/dict_bench.c
@@ -1,5 +1,5 @@
/*
- Copyright 2011-2020 David Robillard <d@drobilla.net>
+ Copyright 2011-2021 David Robillard <d@drobilla.net>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
@@ -25,7 +25,6 @@ ZIX_DISABLE_GLIB_WARNINGS
#include <glib.h>
ZIX_RESTORE_WARNINGS
-#include <ctype.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
@@ -35,7 +34,7 @@ ZIX_RESTORE_WARNINGS
#include <time.h>
typedef struct {
- void* buf;
+ char* buf;
size_t len;
} ZixChunk;
@@ -49,10 +48,16 @@ lcg64(const uint64_t i)
return (a * i) + c;
}
-static uint32_t
+ZIX_PURE_FUNC static const void*
+identity(const void* record)
+{
+ return record;
+}
+
+static size_t
zix_chunk_hash(const ZixChunk* const chunk)
{
- return zix_digest32(0u, chunk->buf, chunk->len);
+ return zix_digest(0u, chunk->buf, chunk->len);
}
static bool
@@ -88,27 +93,27 @@ main(int argc, char** argv)
return test_fail("Failed to open file %s\n", file);
}
- size_t max_n_strings = 100000000;
+ size_t max_n_strings = 1u << 20u;
/* Read input strings */
- char** strings = NULL;
- size_t* lengths = NULL;
- size_t n_strings = 0;
- char* buf = (char*)calloc(1, 1);
- size_t buf_len = 1;
- size_t this_str_len = 0;
+ ZixChunk* chunks = NULL;
+ size_t n_chunks = 0;
+ char* buf = (char*)calloc(1, 1);
+ size_t buf_len = 1;
+ size_t this_str_len = 0;
for (int c = 0; (c = fgetc(fd)) != EOF;) {
- if (isspace(c)) {
+ if (c == '\n') {
if (this_str_len == 0) {
continue;
}
- strings = (char**)realloc(strings, (n_strings + 1) * sizeof(char*));
- lengths = (size_t*)realloc(lengths, (n_strings + 1) * sizeof(size_t));
- strings[n_strings] = (char*)malloc(buf_len);
- lengths[n_strings] = this_str_len;
- memcpy(strings[n_strings], buf, buf_len);
+
+ chunks = (ZixChunk*)realloc(chunks, (n_chunks + 1) * sizeof(ZixChunk));
+
+ chunks[n_chunks].buf = (char*)malloc(buf_len);
+ chunks[n_chunks].len = this_str_len;
+ memcpy(chunks[n_chunks].buf, buf, buf_len);
this_str_len = 0;
- if (++n_strings == max_n_strings) {
+ if (++n_chunks == max_n_strings) {
break;
}
} else {
@@ -129,12 +134,13 @@ main(int argc, char** argv)
fprintf(insert_dat, "# n\tGHashTable\tZixHash\n");
fprintf(search_dat, "# n\tGHashTable\tZixHash\n");
- for (size_t n = n_strings / 16; n <= n_strings; n *= 2) {
+ for (size_t n = n_chunks / 16; n <= n_chunks; n *= 2) {
printf("Benchmarking n = %zu\n", n);
- GHashTable* hash = g_hash_table_new(g_str_hash, g_str_equal);
- ZixHash* zhash = zix_hash_new((ZixHashFunc)zix_chunk_hash,
- (ZixEqualFunc)zix_chunk_equal,
- sizeof(ZixChunk));
+ GHashTable* hash = g_hash_table_new(g_str_hash, g_str_equal);
+
+ ZixHash* zhash = zix_hash_new(
+ identity, (ZixHashFunc)zix_chunk_hash, (ZixEqualFunc)zix_chunk_equal);
+
fprintf(insert_dat, "%zu", n);
fprintf(search_dat, "%zu", n);
@@ -143,17 +149,16 @@ main(int argc, char** argv)
// GHashTable
struct timespec insert_start = bench_start();
for (size_t i = 0; i < n; ++i) {
- g_hash_table_insert(hash, strings[i], strings[i]);
+ g_hash_table_insert(hash, chunks[i].buf, chunks[i].buf);
}
fprintf(insert_dat, "\t%lf", bench_end(&insert_start));
// ZixHash
insert_start = bench_start();
for (size_t i = 0; i < n; ++i) {
- const ZixChunk chunk = {strings[i], lengths[i] + 1};
- ZixStatus st = zix_hash_insert(zhash, &chunk, NULL);
+ ZixStatus st = zix_hash_insert(zhash, &chunks[i]);
if (st && st != ZIX_STATUS_EXISTS) {
- return test_fail("Failed to insert `%s'\n", strings[i]);
+ return test_fail("Failed to insert `%s'\n", (const char*)chunks[i].buf);
}
}
fprintf(insert_dat, "\t%lf\n", bench_end(&insert_start));
@@ -164,9 +169,10 @@ main(int argc, char** argv)
struct timespec search_start = bench_start();
for (size_t i = 0; i < n; ++i) {
const size_t index = (size_t)(lcg64(seed + i) % n);
- char* match = (char*)g_hash_table_lookup(hash, strings[index]);
- if (!!strcmp(match, strings[index])) {
- return test_fail("Bad match for `%s'\n", strings[index]);
+ char* match = (char*)g_hash_table_lookup(hash, chunks[index].buf);
+ if (!!strcmp(match, chunks[index].buf)) {
+ return test_fail(
+ "GHashTable: Bad match `%s' for `%s'\n", match, chunks[index].buf);
}
}
fprintf(search_dat, "\t%lf", bench_end(&search_start));
@@ -175,17 +181,15 @@ main(int argc, char** argv)
search_start = bench_start();
for (size_t i = 0; i < n; ++i) {
const size_t index = (size_t)(lcg64(seed + i) % n);
- const ZixChunk key = {strings[index], lengths[index] + 1};
const ZixChunk* match = NULL;
- if (!(match = (const ZixChunk*)zix_hash_find(zhash, &key))) {
- return test_fail("Hash: Failed to find `%s'\n", strings[index]);
+ if (!(match =
+ (const ZixChunk*)zix_hash_find_record(zhash, &chunks[index]))) {
+ return test_fail("Hash: Failed to find `%s'\n", chunks[index].buf);
}
- if (!!strcmp((const char*)match->buf, strings[index])) {
- return test_fail("Hash: Bad match %p for `%s': `%s'\n",
- (const void*)match,
- strings[index],
- (const char*)match->buf);
+ if (!!strcmp(match->buf, chunks[index].buf)) {
+ return test_fail(
+ "ZixHash: Bad match `%s' for `%s'\n", match->buf, chunks[index].buf);
}
}
fprintf(search_dat, "\t%lf\n", bench_end(&search_start));
diff --git a/include/.clang-tidy b/include/.clang-tidy
index fec15b8..fc5b3c5 100644
--- a/include/.clang-tidy
+++ b/include/.clang-tidy
@@ -5,6 +5,7 @@ Checks: >
-clang-diagnostic-unused-function,
-clang-diagnostic-unused-macros,
-hicpp-multiway-paths-covered,
+ -hicpp-uppercase-literal-suffix,
-llvmlibc-*,
WarningsAsErrors: '*'
HeaderFilterRegex: '.*'
diff --git a/include/zix/hash.h b/include/zix/hash.h
index 6312d7b..f08d267 100644
--- a/include/zix/hash.h
+++ b/include/zix/hash.h
@@ -1,5 +1,5 @@
/*
- Copyright 2011-2020 David Robillard <d@drobilla.net>
+ Copyright 2011-2021 David Robillard <d@drobilla.net>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
@@ -19,8 +19,8 @@
#include "zix/common.h"
+#include <stdbool.h>
#include <stddef.h>
-#include <stdint.h>
#ifdef __cplusplus
extern "C" {
@@ -33,90 +33,289 @@ extern "C" {
@{
*/
+// ZIX_HASH_KEY_TYPE can be defined to make the API more type-safe
+#if defined(ZIX_HASH_KEY_TYPE)
+typedef ZIX_HASH_KEY_TYPE ZixHashKey;
+#else
+typedef void ZixHashKey;
+#endif
+
+// ZIX_HASH_RECORD_TYPE can be defined to make the API more type-safe
+#if defined(ZIX_HASH_RECORD_TYPE)
+typedef ZIX_HASH_RECORD_TYPE ZixHashRecord;
+#else
+typedef void ZixHashRecord;
+#endif
+
+// ZIX_HASH_SEARCH_DATA_TYPE can be defined to make the API more type-safe
+#if defined(ZIX_HASH_SEARCH_DATA_TYPE)
+typedef ZIX_HASH_SEARCH_DATA_TYPE ZixHashSearchData;
+#else
+typedef void ZixHashSearchData;
+#endif
+
+/**
+ A hash table.
+
+ This is an open addressing hash table that stores pointers to arbitrary user
+ data. Internally, everything is stored in a single flat array that is
+ resized when necessary.
+
+ The single user-provided pointer that is stored in the table is called a
+ "record". A record contains a "key", which is accessed via a user-provided
+ function. This design supports storing large records (which may be
+ expensive to allocate/construct) without requiring an entire record to
+ search. Simple atomic values can be stored by providing a trivial identity
+ function as a key function.
+
+ The table uses power of 2 sizes with a growth factor of 2, so that hash
+ values can be folded into an array index using bitwise AND as a fast modulo.
+ This means that only the necessary low bits of the hash value will be used,
+ so the hash function must be well-balanced within this range. More or less
+ any good modern hash algorithm will be fine, but beware, for example, hash
+ functions that assume they are targeting a table with a prime size.
+
+ Since this doubles and halves in size, it may not be an optimal choice if
+ memory reuse is a priority. A growth factor of 1.5 with fast range
+ reduction may be a better choice there, at the cost of requiring 128-bit
+ arithmetic on 64-bit platforms, and indexing operations being slightly more
+ expensive.
+*/
typedef struct ZixHashImpl ZixHash;
-/// Function for computing the hash of an element
-typedef uint32_t (*ZixHashFunc)(const void* value);
+/// A full hash code for a key which is not folded down to the table size
+typedef size_t ZixHashCode;
+
+/**
+ An iterator to an entry in a hash table.
+
+ This is really just an index, but should be considered opaque to the user
+ and only used via the provided API and equality comparison.
+*/
+typedef size_t ZixHashIter;
+
+/// User function for computing the hash of a key
+typedef ZixHashCode (*ZixHashFunc)(const ZixHashKey* key);
+
+/// User function for determining if two keys are truly equal
+typedef bool (*ZixKeyEqualFunc)(const ZixHashKey* a, const ZixHashKey* b);
-/// Function to visit a hash element
-typedef void (*ZixHashVisitFunc)(void* value, void* user_data);
+/// User function for determining if a key matches in a custom search
+typedef bool (*ZixKeyMatchFunc)(const ZixHashKey* key,
+ const ZixHashSearchData* user_data);
+
+/// User function for getting the key of a record
+typedef const ZixHashKey* (*ZixKeyFunc)(const ZixHashRecord* record);
/**
- Create a new hash table.
+ A "plan" (position) to insert a record in a hash table.
- To minimize space overhead, unlike many hash tables this stores a single
- value, not a key and a value. Any size of value can be stored, but all the
- values in the hash table must be the same size, and the values must be safe
- to copy with memcpy. To get key:value behaviour, simply insert a struct
- with a key and value into the hash.
+ This contains everything necessary to insert a record, except the record
+ itself. It is exposed to split up insertion so that records only need to be
+ allocated if an existing record is not found, but the contents should be
+ considered opaque to the user.
+*/
+typedef struct {
+ ZixHashCode code; ///< Hash code used to find this position
+ ZixHashIter index; ///< Index into hash table
+} ZixHashInsertPlan;
- @param hash_func The hashing function.
- @param equal_func A function to test value equality.
- @param value_size The size of the values to be stored.
+/**
+ Create a new hash table.
+
+ @param key_func A function to retrieve the key from a record.
+ @param hash_func The key hashing function.
+ @param equal_func A function to test keys for equality.
*/
ZIX_API
ZixHash*
-zix_hash_new(ZixHashFunc hash_func, ZixEqualFunc equal_func, size_t value_size);
+zix_hash_new(ZixKeyFunc key_func,
+ ZixHashFunc hash_func,
+ ZixKeyEqualFunc equal_func);
/// Free `hash`
ZIX_API
void
zix_hash_free(ZixHash* hash);
-/// Return the number of elements in `hash`
+/// Return an iterator to the first record in a hash, or the end if it is empty
+ZIX_PURE_API
+ZixHashIter
+zix_hash_begin(const ZixHash* hash);
+
+/// Return an iterator one past the last possible record in a hash
+ZIX_PURE_API
+ZixHashIter
+zix_hash_end(const ZixHash* hash);
+
+/// Return the record pointed to by an iterator
+ZIX_PURE_API
+ZixHashRecord*
+zix_hash_get(const ZixHash* hash, ZixHashIter i);
+
+/// Return an iterator that has been advanced to the next record in a hash
+ZIX_PURE_API
+ZixHashIter
+zix_hash_next(const ZixHash* hash, ZixHashIter i);
+
+/// Return the number of elements in a hash
ZIX_PURE_API
size_t
zix_hash_size(const ZixHash* hash);
/**
- Insert an item into `hash`.
+ Find the best position to insert a record with the given key.
+
+ This searches the hash table and returns either the position of an existing
+ equivalent record, or the best available position to insert a new one. The
+ difference can be determined with zix_hash_record_at().
+
+ If the returned position is not occupied, then it is valid to insert a
+ record with this key using zix_hash_insert_at() until the hash table is
+ modified (which invalidates the position).
+*/
+ZIX_API
+ZixHashInsertPlan
+zix_hash_plan_insert(const ZixHash* hash, const ZixHashKey* key);
+
+/**
+ Find the best position to insert a record with a custom search.
- If no matching value is found, ZIX_STATUS_SUCCESS will be returned, and @p
- inserted will be pointed to the copy of `value` made in the new hash node.
+ This is an advanced low-level version of zix_hash_plan_insert(). It allows
+ a precalculated hash code to be given, along with a custom search predicate.
+ These must be compatible with the functions that manage the hash table:
+ trivial usage would be to pass the equality function used by the hash and
+ the key to search for.
- If a matching value already exists, ZIX_STATUS_EXISTS will be returned, and
- `inserted` will be pointed to the existing value.
+ This is exposed to make it possible to avoid constructing a key at all when
+ potentially inserting. For example, if keys are structures with a fixed
+ header and a variably-sized body, and you have a separate header and body
+ this can be used to find an insert position without having to allocate a
+ contiguous key.
+
+ Note that care must be taken when using this function: improper use can
+ corrupt the hash table. The hash code given must be correct for the key to
+ be inserted, and the predicate must return true only if the key it is called
+ with (the first argument) matches the key to be inserted.
+*/
+ZIX_API
+ZixHashInsertPlan
+zix_hash_plan_insert_prehashed(const ZixHash* hash,
+ ZixHashCode code,
+ ZixKeyMatchFunc predicate,
+ const ZixHashSearchData* user_data);
+
+/**
+ Return the record at the given position, or null.
+
+ This is used to determine if a position returned from zix_hash_plan_insert()
+ can be used to insert a new record, or to access the existing matching
+ record.
+*/
+ZIX_PURE_API
+ZixHashRecord*
+zix_hash_record_at(const ZixHash* hash, ZixHashInsertPlan position);
+
+/**
+ Insert a record at a specific position.
+
+ The position must have been found with an earlier call to
+ zix_hash_plan_insert(), and no modifications must have been made to the hash
+ table since.
+
+ @param hash The hash table.
+
+ @param position The position for the new record.
+
+ @param record The record to insert which, on success, can now be considered
+ owned by the hash table.
+
+ @return ZIX_STATUS_SUCCESS, ZIX_STATUS_EXISTS if a record already exists at
+ this position, or ZIX_STATUS_NO_MEM if growing the hash table failed.
+*/
+ZIX_API
+ZixStatus
+zix_hash_insert_at(ZixHash* hash,
+ ZixHashInsertPlan position,
+ ZixHashRecord* record);
+
+/**
+ Insert a record.
+
+ This is a trivial wrapper for zix_hash_plan_insert() and
+ zix_hash_insert_at() that is more convenient when record construction is not
+ expensive.
@param hash The hash table.
- @param value The value to be inserted.
- @param inserted The copy of `value` in the hash table.
+
+ @param record The record to insert which, on success, can now be considered
+ owned by the hash table.
+
@return ZIX_STATUS_SUCCESS, ZIX_STATUS_EXISTS, or ZIX_STATUS_NO_MEM.
*/
ZIX_API
ZixStatus
-zix_hash_insert(ZixHash* hash, const void* value, void** inserted);
+zix_hash_insert(ZixHash* hash, ZixHashRecord* record);
/**
- Remove an item from `hash`.
+ Erase a record at a specific position.
+
+ @param hash The hash table to remove the record from.
+
+ @param i Iterator to the record to remove. This must be a valid iterator
+ from an earlier call to zix_hash_find(), that is, the hash table must not
+ have been modified since.
+
+ @param removed Set to the removed record, or null.
+
+ @return ZIX_STATUS_SUCCES or ZIX_STATUS_BAD_ARG if `i` does not point at a
+ removable record.
+*/
+ZIX_API
+ZixStatus
+zix_hash_erase(ZixHash* hash, ZixHashIter i, ZixHashRecord** removed);
+
+/**
+ Remove a record.
@param hash The hash table.
- @param value The value to remove.
+ @param key The key of the record to remove.
+ @param removed Set to the removed record, or null.
@return ZIX_STATUS_SUCCES or ZIX_STATUS_NOT_FOUND.
*/
ZIX_API
ZixStatus
-zix_hash_remove(ZixHash* hash, const void* value);
+zix_hash_remove(ZixHash* hash, const ZixHashKey* key, ZixHashRecord** removed);
/**
- Search for an item in `hash`.
+ Find the position of a record with a given key.
- @param hash The hash table.
- @param value The value to search for.
+ @param hash The hash table to search.
+
+ @param key The key of the desired record.
+
+ @return An iterator to the matching record, or the end iterator if no such
+ record exists.
*/
ZIX_API
-void*
-zix_hash_find(const ZixHash* hash, const void* value);
+ZixHashIter
+zix_hash_find(const ZixHash* hash, const ZixHashKey* key);
/**
- Call `f` on each value in `hash`.
+ Find a record with a given key.
- @param hash The hash table.
- @param f The function to call on each value.
- @param user_data The user_data parameter passed to `f`.
+ This is essentially the same as zix_hash_find(), but returns a pointer to
+ the record for convenience.
+
+ @param hash The hash table to search.
+
+ @param key The key of the desired record.
+
+ @return A pointer to the matching record, of null if no such record exists.
*/
ZIX_API
-void
-zix_hash_foreach(ZixHash* hash, ZixHashVisitFunc f, void* user_data);
+ZixHashRecord*
+zix_hash_find_record(const ZixHash* hash, const ZixHashKey* key);
/**
@}
diff --git a/meson.build b/meson.build
index cdde084..7b8e872 100644
--- a/meson.build
+++ b/meson.build
@@ -34,7 +34,7 @@ if get_option('strict')
elif cc.get_id() == 'gcc'
c_warnings = [
# '-Waggregate-return',
- '-Walloc-size-larger-than=4096',
+ '-Walloc-size-larger-than=16384',
'-Walloc-zero',
'-Walloca',
'-Wanalyzer-too-complex',
diff --git a/src/hash.c b/src/hash.c
index d9556ed..5bc9d60 100644
--- a/src/hash.c
+++ b/src/hash.c
@@ -1,5 +1,5 @@
/*
- Copyright 2011-2020 David Robillard <d@drobilla.net>
+ Copyright 2011-2021 David Robillard <d@drobilla.net>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
@@ -17,214 +17,327 @@
#include "zix/hash.h"
#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
#include <stdlib.h>
-#include <string.h>
-
-/**
- Primes, each slightly less than twice its predecessor, and as far away
- from powers of two as possible.
-*/
-static const unsigned sizes[] = {
- 53, 97, 193, 389, 769, 1543, 3079,
- 6151, 12289, 24593, 49157, 98317, 196613, 393241,
- 786433, 1572869, 3145739, 6291469, 12582917, 25165843, 50331653,
- 100663319, 201326611, 402653189, 805306457, 1610612741, 0};
typedef struct ZixHashEntry {
- struct ZixHashEntry* next; ///< Next entry in bucket
- uint32_t hash; ///< Non-modulo hash value
- // Value follows here (access with zix_hash_value)
+ ZixHashCode hash; ///< Non-folded hash value
+ ZixHashRecord* value; ///< Pointer to user-owned record
} ZixHashEntry;
struct ZixHashImpl {
- ZixHashFunc hash_func;
- ZixEqualFunc equal_func;
- ZixHashEntry** buckets;
- const unsigned* n_buckets;
- size_t value_size;
- unsigned count;
+ ZixKeyFunc key_func; ///< User-provided key accessor
+ ZixHashFunc hash_func; ///< User-provided hashing function
+ ZixKeyEqualFunc equal_func; ///< User-provided equality comparison function
+ size_t count; ///< Number of records stored in the table
+ size_t mask; ///< Bit mask for fast modulo (n_entries - 1)
+ size_t n_entries; ///< Power of two table size
+ ZixHashEntry* entries; ///< Pointer to dynamically allocated table
};
-static inline void*
-zix_hash_value(ZixHashEntry* entry)
-{
- return entry + 1;
-}
+static const size_t min_n_entries = 4u;
+static const size_t tombstone = 0xDEADu;
ZixHash*
-zix_hash_new(ZixHashFunc hash_func, ZixEqualFunc equal_func, size_t value_size)
+zix_hash_new(const ZixKeyFunc key_func,
+ const ZixHashFunc hash_func,
+ const ZixKeyEqualFunc equal_func)
{
- ZixHash* hash = (ZixHash*)malloc(sizeof(ZixHash));
- if (hash) {
- hash->hash_func = hash_func;
- hash->equal_func = equal_func;
- hash->n_buckets = &sizes[0];
- hash->value_size = value_size;
- hash->count = 0;
- if (!(hash->buckets =
- (ZixHashEntry**)calloc(*hash->n_buckets, sizeof(ZixHashEntry*)))) {
- free(hash);
- return NULL;
- }
+ ZixHash* const hash = (ZixHash*)malloc(sizeof(ZixHash));
+ if (!hash) {
+ return NULL;
}
+
+ hash->key_func = key_func;
+ hash->hash_func = hash_func;
+ hash->equal_func = equal_func;
+ hash->count = 0u;
+ hash->n_entries = min_n_entries;
+ hash->mask = hash->n_entries - 1u;
+
+ hash->entries = (ZixHashEntry*)calloc(hash->n_entries, sizeof(ZixHashEntry));
+ if (!hash->entries) {
+ free(hash);
+ return NULL;
+ }
+
return hash;
}
void
-zix_hash_free(ZixHash* hash)
+zix_hash_free(ZixHash* const hash)
{
- if (!hash) {
- return;
+ if (hash) {
+ free(hash->entries);
+ free(hash);
}
+}
- for (unsigned b = 0; b < *hash->n_buckets; ++b) {
- ZixHashEntry* bucket = hash->buckets[b];
- for (ZixHashEntry* e = bucket; e;) {
- ZixHashEntry* next = e->next;
- free(e);
- e = next;
- }
- }
+ZixHashIter
+zix_hash_begin(const ZixHash* const hash)
+{
+ return hash->entries[0u].value ? 0u : zix_hash_next(hash, 0u);
+}
+
+ZixHashIter
+zix_hash_end(const ZixHash* const hash)
+{
+ return hash->n_entries;
+}
+
+ZixHashRecord*
+zix_hash_get(const ZixHash* hash, const ZixHashIter i)
+{
+ assert(i < hash->n_entries);
- free(hash->buckets);
- free(hash);
+ return hash->entries[i].value;
+}
+
+ZixHashIter
+zix_hash_next(const ZixHash* const hash, ZixHashIter i)
+{
+ do {
+ ++i;
+ } while (i < hash->n_entries && !hash->entries[i].value);
+
+ return i;
}
size_t
-zix_hash_size(const ZixHash* hash)
+zix_hash_size(const ZixHash* const hash)
{
return hash->count;
}
-static inline void
-insert_entry(ZixHashEntry** bucket, ZixHashEntry* entry)
+static inline size_t
+fold_hash(const ZixHashCode h_nomod, const size_t mask)
+{
+ return h_nomod & mask;
+}
+
+static inline bool
+is_empty(const ZixHashEntry* const entry)
+{
+ return !entry->value && !entry->hash;
+}
+
+static inline bool
+is_match(const ZixHash* const hash,
+ const ZixHashCode code,
+ const size_t entry_index,
+ ZixKeyEqualFunc predicate,
+ const void* const user_data)
+{
+ const ZixHashEntry* const entry = &hash->entries[entry_index];
+
+ return entry->value && entry->hash == code &&
+ predicate(hash->key_func(entry->value), user_data);
+}
+
+static inline size_t
+next_index(const ZixHash* const hash, const size_t i)
{
- entry->next = *bucket;
- *bucket = entry;
+ return (i == hash->mask) ? 0u : (i + 1u);
}
-static inline ZixStatus
-rehash(ZixHash* hash, unsigned new_n_buckets)
+static inline ZixHashIter
+find_entry(const ZixHash* const hash,
+ const ZixHashKey* const key,
+ const size_t h,
+ const ZixHashCode code)
{
- ZixHashEntry** new_buckets =
- (ZixHashEntry**)calloc(new_n_buckets, sizeof(ZixHashEntry*));
- if (!new_buckets) {
+ size_t i = h;
+
+ while (!is_match(hash, code, i, hash->equal_func, key) &&
+ !is_empty(&hash->entries[i])) {
+ i = next_index(hash, i);
+ }
+
+ return i;
+}
+
+static ZixStatus
+rehash(ZixHash* const hash, const size_t old_n_entries)
+{
+ ZixHashEntry* const old_entries = hash->entries;
+ const size_t new_n_entries = hash->n_entries;
+
+ // Replace the array in the hash first so we can use find_entry() normally
+ hash->entries = (ZixHashEntry*)calloc(new_n_entries, sizeof(ZixHashEntry));
+ if (!hash->entries) {
return ZIX_STATUS_NO_MEM;
}
- const unsigned old_n_buckets = *hash->n_buckets;
- for (unsigned b = 0; b < old_n_buckets; ++b) {
- for (ZixHashEntry* e = hash->buckets[b]; e;) {
- ZixHashEntry* const next = e->next;
- const unsigned h = e->hash % new_n_buckets;
- insert_entry(&new_buckets[h], e);
- e = next;
+ // Reinsert every element into the new array
+ for (size_t i = 0u; i < old_n_entries; ++i) {
+ ZixHashEntry* const entry = &old_entries[i];
+
+ if (entry->value) {
+ assert(hash->mask == hash->n_entries - 1u);
+ const size_t new_h = fold_hash(entry->hash, hash->mask);
+ const size_t new_i = find_entry(hash, entry->value, new_h, entry->hash);
+
+ hash->entries[new_i] = *entry;
}
}
- free(hash->buckets);
- hash->buckets = new_buckets;
+ free(old_entries);
+ return ZIX_STATUS_SUCCESS;
+}
+
+static ZixStatus
+grow(ZixHash* const hash)
+{
+ const size_t old_n_entries = hash->n_entries;
+
+ hash->n_entries <<= 1u;
+ hash->mask = hash->n_entries - 1u;
+
+ return rehash(hash, old_n_entries);
+}
+
+static ZixStatus
+shrink(ZixHash* const hash)
+{
+ if (hash->n_entries > min_n_entries) {
+ const size_t old_n_entries = hash->n_entries;
+
+ hash->n_entries >>= 1u;
+ hash->mask = hash->n_entries - 1u;
+
+ return rehash(hash, old_n_entries);
+ }
return ZIX_STATUS_SUCCESS;
}
-static inline ZixHashEntry*
-find_entry(const ZixHash* hash,
- const void* key,
- const unsigned h,
- const unsigned h_nomod)
+ZixHashIter
+zix_hash_find(const ZixHash* const hash, const ZixHashKey* const key)
+{
+ const ZixHashCode h_nomod = hash->hash_func(key);
+ const size_t h = fold_hash(h_nomod, hash->mask);
+ const ZixHashIter i = find_entry(hash, key, h, h_nomod);
+
+ return is_empty(&hash->entries[i]) ? hash->n_entries : i;
+}
+
+ZixHashRecord*
+zix_hash_find_record(const ZixHash* const hash, const ZixHashKey* const key)
{
- for (ZixHashEntry* e = hash->buckets[h]; e; e = e->next) {
- if (e->hash == h_nomod && hash->equal_func(zix_hash_value(e), key)) {
- return e;
+ const ZixHashCode h_nomod = hash->hash_func(key);
+ const size_t h = fold_hash(h_nomod, hash->mask);
+
+ return hash->entries[find_entry(hash, key, h, h_nomod)].value;
+}
+
+ZixHashInsertPlan
+zix_hash_plan_insert_prehashed(const ZixHash* const hash,
+ const ZixHashCode code,
+ const ZixKeyMatchFunc predicate,
+ const void* const user_data)
+{
+ // Calculate an ideal initial position
+ ZixHashInsertPlan pos = {code, fold_hash(code, hash->mask)};
+
+ // Search for a free position starting at the ideal one
+ const size_t start_index = pos.index;
+ size_t first_tombstone = SIZE_MAX;
+ while (!is_empty(&hash->entries[pos.index])) {
+ if (is_match(hash, code, pos.index, predicate, user_data)) {
+ return pos;
+ }
+
+ if (first_tombstone == SIZE_MAX && !hash->entries[pos.index].value) {
+ assert(hash->entries[pos.index].hash == tombstone);
+ first_tombstone = pos.index; // Remember the first/best free index
+ }
+
+ pos.index = next_index(hash, pos.index);
+ if (pos.index == start_index) {
+ break; // Rare edge case: entire table is full of entries/tombstones
}
}
- return NULL;
+
+ // If we found a tombstone before an empty slot, place the new element there
+ pos.index = first_tombstone < SIZE_MAX ? first_tombstone : pos.index;
+ assert(!hash->entries[pos.index].value);
+
+ return pos;
}
-void*
-zix_hash_find(const ZixHash* hash, const void* value)
+ZixHashInsertPlan
+zix_hash_plan_insert(const ZixHash* const hash, const ZixHashKey* const key)
{
- const unsigned h_nomod = hash->hash_func(value);
- const unsigned h = h_nomod % *hash->n_buckets;
- ZixHashEntry* const entry = find_entry(hash, value, h, h_nomod);
- return entry ? zix_hash_value(entry) : 0;
+ return zix_hash_plan_insert_prehashed(
+ hash, hash->hash_func(key), hash->equal_func, key);
}
-ZixStatus
-zix_hash_insert(ZixHash* hash, const void* value, void** inserted)
+ZixHashRecord*
+zix_hash_record_at(const ZixHash* const hash, const ZixHashInsertPlan position)
{
- unsigned h_nomod = hash->hash_func(value);
- unsigned h = h_nomod % *hash->n_buckets;
+ return hash->entries[position.index].value;
+}
- ZixHashEntry* elem = find_entry(hash, value, h, h_nomod);
- if (elem) {
- assert(elem->hash == h_nomod);
- if (inserted) {
- *inserted = zix_hash_value(elem);
- }
+ZixStatus
+zix_hash_insert_at(ZixHash* const hash,
+ const ZixHashInsertPlan position,
+ ZixHashRecord* const record)
+{
+ if (hash->entries[position.index].value) {
return ZIX_STATUS_EXISTS;
}
- elem = (ZixHashEntry*)malloc(sizeof(ZixHashEntry) + hash->value_size);
- if (!elem) {
- return ZIX_STATUS_NO_MEM;
- }
- elem->next = NULL;
- elem->hash = h_nomod;
- memcpy(elem + 1, value, hash->value_size);
-
- const unsigned next_n_buckets = *(hash->n_buckets + 1);
- if (next_n_buckets != 0 && (hash->count + 1) >= next_n_buckets) {
- if (!rehash(hash, next_n_buckets)) {
- h = h_nomod % *(++hash->n_buckets);
- }
- }
+ // Set entry to new value
+ ZixHashEntry* const entry = &hash->entries[position.index];
+ assert(!entry->value);
+ entry->hash = position.code;
+ entry->value = record;
- insert_entry(&hash->buckets[h], elem);
- ++hash->count;
- if (inserted) {
- *inserted = zix_hash_value(elem);
+ // Update size and rehash if we exceeded the maximum load
+ const size_t max_load = hash->n_entries / 2u + hash->n_entries / 8u;
+ if (++hash->count >= max_load) {
+ return grow(hash);
}
+
return ZIX_STATUS_SUCCESS;
}
ZixStatus
-zix_hash_remove(ZixHash* hash, const void* value)
-{
- const unsigned h_nomod = hash->hash_func(value);
- const unsigned h = h_nomod % *hash->n_buckets;
-
- ZixHashEntry** next_ptr = &hash->buckets[h];
- for (ZixHashEntry* e = hash->buckets[h]; e; e = e->next) {
- if (h_nomod == e->hash && hash->equal_func(zix_hash_value(e), value)) {
- *next_ptr = e->next;
- free(e);
- --hash->count;
- return ZIX_STATUS_SUCCESS;
- }
- next_ptr = &e->next;
- }
+zix_hash_insert(ZixHash* const hash, ZixHashRecord* const record)
+{
+ const ZixHashKey* const key = hash->key_func(record);
+ const ZixHashInsertPlan position = zix_hash_plan_insert(hash, key);
- if (hash->n_buckets != sizes) {
- const unsigned prev_n_buckets = *(hash->n_buckets - 1);
- if (hash->count - 1 <= prev_n_buckets) {
- if (!rehash(hash, prev_n_buckets)) {
- --hash->n_buckets;
- }
- }
+ return zix_hash_insert_at(hash, position, record);
+}
+
+ZixStatus
+zix_hash_erase(ZixHash* const hash,
+ const ZixHashIter i,
+ ZixHashRecord** const removed)
+{
+ // Replace entry with a tombstone
+ *removed = hash->entries[i].value;
+ hash->entries[i].hash = tombstone;
+ hash->entries[i].value = NULL;
+
+ // Decrease element count and rehash if necessary
+ --hash->count;
+ if (hash->count < hash->n_entries / 4u) {
+ return shrink(hash);
}
- return ZIX_STATUS_NOT_FOUND;
+ return ZIX_STATUS_SUCCESS;
}
-void
-zix_hash_foreach(ZixHash* hash, ZixHashVisitFunc f, void* user_data)
+ZixStatus
+zix_hash_remove(ZixHash* const hash,
+ const ZixHashKey* const key,
+ ZixHashRecord** const removed)
{
- for (unsigned b = 0; b < *hash->n_buckets; ++b) {
- ZixHashEntry* bucket = hash->buckets[b];
- for (ZixHashEntry* e = bucket; e; e = e->next) {
- f(zix_hash_value(e), user_data);
- }
- }
+ const ZixHashIter i = zix_hash_find(hash, key);
+
+ return i == hash->n_entries ? ZIX_STATUS_NOT_FOUND
+ : zix_hash_erase(hash, i, removed);
}
diff --git a/test/.clang-tidy b/test/.clang-tidy
index b8084d5..0139c99 100644
--- a/test/.clang-tidy
+++ b/test/.clang-tidy
@@ -11,6 +11,7 @@ Checks: >
-cppcoreguidelines-avoid-non-const-global-variables,
-google-readability-casting,
-hicpp-multiway-paths-covered,
+ -hicpp-uppercase-literal-suffix,
-llvm-header-guard,
-llvmlibc-*,
-misc-no-recursion,
diff --git a/test/hash_test.c b/test/hash_test.c
index d82b76a..68556b0 100644
--- a/test/hash_test.c
+++ b/test/hash_test.c
@@ -14,37 +14,29 @@
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
+#undef NDEBUG
+
+#define ZIX_HASH_KEY_TYPE const char
+#define ZIX_HASH_RECORD_TYPE const char
+
+#include "test_data.h"
#include "test_malloc.h"
#include "zix/common.h"
#include "zix/digest.h"
#include "zix/hash.h"
+#include <assert.h>
#include <inttypes.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
static bool expect_failure = false;
-static const char* strings[] = {
- "one", "two", "three", "four", "five", "six", "seven", "eight",
- "2one", "2two", "2three", "2four", "2five", "2six", "2seven", "2eight",
- "3one", "3two", "3three", "3four", "3five", "3six", "3seven", "3eight",
- "4one", "4two", "4three", "4four", "4five", "4six", "4seven", "4eight",
- "5one", "5two", "5three", "5four", "5five", "5six", "5seven", "5eight",
- "6one", "6two", "6three", "6four", "6five", "6six", "6seven", "6eight",
- "7one", "7two", "7three", "7four", "7five", "7six", "7seven", "7eight",
- "8one", "8two", "8three", "8four", "8five", "8six", "8seven", "8eight",
- "9one", "9two", "9three", "9four", "9five", "9six", "9seven", "9eight",
- "Aone", "Atwo", "Athree", "Afour", "Afive", "Asix", "Aseven", "Aeight",
- "Bone", "Btwo", "Bthree", "Bfour", "Bfive", "Bsix", "Bseven", "Beight",
- "Cone", "Ctwo", "Cthree", "Cfour", "Cfive", "Csix", "Cseven", "Ceight",
- "Done", "Dtwo", "Dthree", "Dfour", "Dfive", "Dsix", "Dseven", "Deight",
-};
-
ZIX_LOG_FUNC(1, 2)
static int
test_fail(const char* fmt, ...)
@@ -60,161 +52,323 @@ test_fail(const char* fmt, ...)
return 1;
}
-static unsigned n_checked = 0;
+ZIX_PURE_FUNC static const char*
+identity(const char* record)
+{
+ return record;
+}
+
+/// Decent hash function using zix_digest (murmur2)
+ZIX_PURE_FUNC static size_t
+decent_string_hash(const char* const str)
+{
+ return zix_digest(0u, str, strlen(str));
+}
-static void
-check(void* value, void* ZIX_UNUSED(user_data))
+/// Terrible hash function from K&R first edition
+ZIX_PURE_FUNC static size_t
+terrible_string_hash(const char* str)
{
- if (strlen(*(const char* const*)value) >= 3) {
- ++n_checked;
- } else {
- fprintf(stderr, "ERROR: %s\n", (const char*)value);
+ size_t hash = 0u;
+ uint8_t c = 0u;
+
+ while ((c = (uint8_t)*str++)) {
+ hash += c;
}
+
+ return hash;
}
-static uint32_t
-string_ptr_hash(const void* value)
+ZIX_PURE_FUNC static size_t
+string_hash_aligned(const char* const str)
{
- const char* const str = *(const char* const*)value;
+ size_t length = strlen(str);
- return zix_digest32(0u, str, strlen(str));
+ length = (length + (sizeof(size_t) - 1)) / sizeof(size_t) * sizeof(size_t);
+ return zix_digest_aligned(0u, str, length);
+}
+
+ZIX_PURE_FUNC static size_t
+string_hash32(const char* const str)
+{
+ return (size_t)zix_digest32(0u, str, strlen(str));
}
+ZIX_PURE_FUNC static size_t
+string_hash64(const char* const str)
+{
+ return (size_t)zix_digest64(0u, str, strlen(str));
+}
+
+ZIX_PURE_FUNC static size_t
+string_hash32_aligned(const char* const str)
+{
+ size_t length = strlen(str);
+
+ length = (length + 3u) / 4u * 4u;
+ return (size_t)zix_digest32_aligned(0u, str, length);
+}
+
+#if UINTPTR_MAX >= UINT64_MAX
+
+ZIX_PURE_FUNC static size_t
+string_hash64_aligned(const char* const str)
+{
+ size_t length = strlen(str);
+
+ length = (length + 7u) / 8u * 8u;
+ return (size_t)zix_digest64_aligned(0u, str, length);
+}
+
+#endif
+
static bool
-string_ptr_equal(const void* a, const void* b)
+string_equal(const char* const a, const char* const b)
{
- return !strcmp(*(const char* const*)a, *(const char* const*)b);
+ return !strcmp(a, b);
}
static int
-stress(void)
+stress_with(const ZixHashFunc hash_func, const size_t n_elems)
{
- ZixHash* hash =
- zix_hash_new(string_ptr_hash, string_ptr_equal, sizeof(const char*));
+ ZixHash* hash = zix_hash_new(identity, hash_func, string_equal);
if (!hash) {
return test_fail("Failed to allocate hash\n");
}
- const size_t n_strings = sizeof(strings) / sizeof(const char*);
+ static const size_t string_length = 15;
+
+ char* const buffer = (char*)calloc(1, n_elems * (string_length + 1));
+ char** const strings = (char**)calloc(sizeof(char*), n_elems);
+ if (!buffer || !strings) {
+ free(buffer);
+ free(strings);
+ return test_fail("Failed to allocate strings\n");
+ }
+
+ uint32_t seed = 1u;
+ for (size_t i = 0u; i < n_elems; ++i) {
+ strings[i] = buffer + i * (string_length + 1);
+ assert((uintptr_t)strings[i] % sizeof(size_t) == 0);
+ assert((uintptr_t)strings[i] % sizeof(uint32_t) == 0);
+
+ for (size_t j = 0u; j < string_length; ++j) {
+ seed = lcg32(seed);
+ strings[i][j] = (char)('!' + (seed % 92));
+ }
+ }
// Insert each string
- for (size_t i = 0; i < n_strings; ++i) {
- void* inserted = NULL;
- ZixStatus st = zix_hash_insert(hash, &strings[i], &inserted);
+ for (size_t i = 0; i < n_elems; ++i) {
+ ZixStatus st = zix_hash_insert(hash, strings[i]);
if (st) {
return test_fail("Failed to insert `%s'\n", strings[i]);
}
-
- if (*(const void* const*)inserted != strings[i]) {
- return test_fail("Corrupt insertion %s != %s\n",
- strings[i],
- *(const char* const*)inserted);
- }
}
// Ensure hash size is correct
- if (zix_hash_size(hash) != n_strings) {
- return test_fail("Hash size %" PRIuPTR " != %" PRIuPTR "\n",
- zix_hash_size(hash),
- n_strings);
+ if (zix_hash_size(hash) != n_elems) {
+ return test_fail(
+ "Hash size %" PRIuPTR " != %" PRIuPTR "\n", zix_hash_size(hash), n_elems);
}
- // zix_hash_print_dot(hash, stdout);
-
// Attempt to insert each string again
- for (size_t i = 0; i < n_strings; ++i) {
- void* inserted = NULL;
- ZixStatus st = zix_hash_insert(hash, &strings[i], &inserted);
+ for (size_t i = 0; i < n_elems; ++i) {
+ ZixStatus st = zix_hash_insert(hash, strings[i]);
if (st != ZIX_STATUS_EXISTS) {
- return test_fail("Double inserted `%s'\n", strings[i]);
+ return test_fail(
+ "Double inserted `%s' (%s)\n", strings[i], zix_strerror(st));
}
}
// Search for each string
- for (size_t i = 0; i < n_strings; ++i) {
- const char* const* match =
- (const char* const*)zix_hash_find(hash, &strings[i]);
+ for (size_t i = 0; i < n_elems; ++i) {
+ const char* match = (const char*)zix_hash_find_record(hash, strings[i]);
if (!match) {
return test_fail("Failed to find `%s'\n", strings[i]);
}
- if (*match != strings[i]) {
- return test_fail("Bad match for `%s': `%s'\n", strings[i], *match);
+ if (match != strings[i]) {
+ return test_fail("Bad match for `%s': `%s'\n", strings[i], match);
}
}
- // Try some false matches
- const char* not_indexed[] = {"ftp://example.org/not-there-at-all",
- "http://example.org/foobar",
- "http://",
- "http://otherdomain.com"};
- const size_t n_not_indexed = sizeof(not_indexed) / sizeof(char*);
- for (size_t i = 0; i < n_not_indexed; ++i) {
- const char* const* match =
- (const char* const*)zix_hash_find(hash, &not_indexed[i]);
+ static const char* const not_indexed_string = "__not__indexed__";
+
+ // Do a quick smoke test to ensure that false matches don't succeed
+ char* const not_indexed = (char*)calloc(1, strlen(not_indexed_string) + 1);
+ if (not_indexed) {
+ memcpy(not_indexed, not_indexed_string, strlen(not_indexed_string) + 1);
+ const char* match = (const char*)zix_hash_find_record(hash, not_indexed);
if (match) {
- return test_fail("Unexpectedly found `%s'\n", not_indexed[i]);
+ return test_fail("Unexpectedly found `%s'\n", not_indexed);
}
+ free(not_indexed);
}
// Remove strings
- for (size_t i = 0; i < n_strings; ++i) {
+ for (size_t i = 0; i < n_elems; ++i) {
const size_t initial_size = zix_hash_size(hash);
// Remove string
- ZixStatus st = zix_hash_remove(hash, &strings[i]);
+ const char* removed = NULL;
+ ZixStatus st = zix_hash_remove(hash, strings[i], &removed);
if (st) {
return test_fail("Failed to remove `%s'\n", strings[i]);
}
+ // Ensure the removed value is what we expected
+ if (removed != strings[i]) {
+ return test_fail("Removed `%s` instead of `%s`\n", removed, strings[i]);
+ }
+
// Ensure size is updated
if (zix_hash_size(hash) != initial_size - 1) {
return test_fail("Removing node did not decrease hash size\n");
}
// Ensure second removal fails
- st = zix_hash_remove(hash, &strings[i]);
+ st = zix_hash_remove(hash, strings[i], &removed);
if (st != ZIX_STATUS_NOT_FOUND) {
return test_fail("Unexpectedly removed `%s' twice\n", strings[i]);
}
+ // Ensure value can no longer be found
+ assert(zix_hash_find(hash, strings[i]) == zix_hash_end(hash));
+
// Check to ensure remaining strings are still present
- for (size_t j = i + 1; j < n_strings; ++j) {
- const char* const* match =
- (const char* const*)zix_hash_find(hash, &strings[j]);
+ for (size_t j = i + 1; j < n_elems; ++j) {
+ const char* match = (const char*)zix_hash_find_record(hash, strings[j]);
if (!match) {
return test_fail("Failed to find `%s' after remove\n", strings[j]);
}
- if (*match != strings[j]) {
+ if (match != strings[j]) {
return test_fail("Bad match for `%s' after remove\n", strings[j]);
}
}
}
// Insert each string again (to check non-empty destruction)
- for (size_t i = 0; i < n_strings; ++i) {
- ZixStatus st = zix_hash_insert(hash, &strings[i], NULL);
+ for (size_t i = 0; i < n_elems; ++i) {
+ ZixHashInsertPlan plan = zix_hash_plan_insert(hash, strings[i]);
+
+ assert(!zix_hash_record_at(hash, plan));
+ ZixStatus st = zix_hash_insert_at(hash, plan, strings[i]);
if (st) {
return test_fail("Failed to insert `%s'\n", strings[i]);
}
}
// Check key == value (and test zix_hash_foreach)
- zix_hash_foreach(hash, check, NULL);
- if (n_checked != n_strings) {
+ size_t n_checked = 0u;
+ for (ZixHashIter i = zix_hash_begin(hash); i != zix_hash_end(hash);
+ i = zix_hash_next(hash, i)) {
+ const char* const string = (const char*)zix_hash_get(hash, i);
+ assert(string);
+ if (strlen(string) < 3) {
+ return test_fail("Corrupt value `%s'\n", string);
+ }
+
+ ++n_checked;
+ }
+ if (n_checked != n_elems) {
return test_fail("Check failed\n");
}
+ free(strings);
+ free(buffer);
zix_hash_free(hash);
return 0;
}
+static int
+stress(const size_t n_elems)
+{
+ if (stress_with(decent_string_hash, n_elems) ||
+ stress_with(terrible_string_hash, n_elems / 4) ||
+ stress_with(string_hash_aligned, n_elems / 4) ||
+ stress_with(string_hash32, n_elems / 4) ||
+ stress_with(string_hash64, n_elems / 4) ||
+ stress_with(string_hash32_aligned, n_elems / 4)) {
+ return 1;
+ }
+
+#if UINTPTR_MAX >= UINT64_MAX
+ if (stress_with(string_hash64_aligned, n_elems / 4)) {
+ return 1;
+ }
+#endif
+
+ return 0;
+}
+
+/// Identity hash function for numeric strings for explicitly hitting cases
+ZIX_PURE_FUNC static size_t
+identity_index_hash(const char* const str)
+{
+ return strtoul(str, NULL, 10);
+}
+
+static void
+test_all_tombstones(void)
+{
+ /* This tests an edge case where a minimum-sized table can be entirely full
+ of tombstones. If the search loop is not written carefully, then this can
+ result in a hang. This has been seen to occur in real code, though it's
+ very hard to hit with a decent hash function. So, we use the above
+ degenerate index hashing function to explicitly place elements exactly
+ where we want to hit this case. */
+
+#define N_STRINGS 4
+
+ static const char* original_strings[N_STRINGS] = {
+ "0 a",
+ "1 a",
+ "2 a",
+ "3 a",
+ };
+
+ static const char* collision_strings[N_STRINGS] = {
+ "0 b",
+ "1 b",
+ "2 b",
+ "3 b",
+ };
+
+ ZixStatus st = ZIX_STATUS_SUCCESS;
+ ZixHash* hash = zix_hash_new(identity, identity_index_hash, string_equal);
+
+ // Insert each element then immediately remove it
+ for (unsigned i = 0u; i < N_STRINGS; ++i) {
+ const char* removed = NULL;
+
+ assert(!zix_hash_insert(hash, original_strings[i]));
+ assert(!zix_hash_remove(hash, original_strings[i], &removed));
+ }
+
+ // Now the table should be "empty" but contain tombstones
+ assert(zix_hash_size(hash) == 0);
+
+ // Insert clashing elements which should hit the "all tombstones" case
+ for (unsigned i = 0u; i < N_STRINGS; ++i) {
+ assert(!zix_hash_insert(hash, collision_strings[i]));
+ assert(!st);
+ }
+
+ zix_hash_free(hash);
+
+#undef N_STRINGS
+}
+
int
main(void)
{
zix_hash_free(NULL);
+ test_all_tombstones();
+
+ static const size_t n_elems = 1024u;
- if (stress()) {
+ if (stress(n_elems)) {
return 1;
}
@@ -224,7 +378,7 @@ main(void)
expect_failure = true;
for (size_t i = 0; i < total_n_allocs; ++i) {
test_malloc_reset(i);
- stress();
+ stress(n_elems);
}
test_malloc_reset((size_t)-1);