From f0c30e3fff0072746252eb9a37334e740975f389 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Wed, 6 Jun 2018 20:35:46 +0200 Subject: Add zix data structures --- src/.clang-tidy | 1 + src/zix/btree.c | 936 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/zix/btree.h | 187 +++++++++++ src/zix/common.h | 138 ++++++++ src/zix/digest.c | 141 +++++++++ src/zix/digest.h | 67 ++++ src/zix/hash.c | 230 ++++++++++++++ src/zix/hash.h | 138 ++++++++ 8 files changed, 1838 insertions(+) create mode 100644 src/zix/btree.c create mode 100644 src/zix/btree.h create mode 100644 src/zix/common.h create mode 100644 src/zix/digest.c create mode 100644 src/zix/digest.h create mode 100644 src/zix/hash.c create mode 100644 src/zix/hash.h (limited to 'src') diff --git a/src/.clang-tidy b/src/.clang-tidy index 5be37470..79f266d9 100644 --- a/src/.clang-tidy +++ b/src/.clang-tidy @@ -9,6 +9,7 @@ Checks: > -cert-dcl51-cpp, -clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling, -google-readability-todo, + -hicpp-multiway-paths-covered, -hicpp-signed-bitwise, -llvm-header-guard, -llvmlibc-*, diff --git a/src/zix/btree.c b/src/zix/btree.c new file mode 100644 index 00000000..19fee858 --- /dev/null +++ b/src/zix/btree.c @@ -0,0 +1,936 @@ +/* + Copyright 2011-2020 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "zix/btree.h" + +#include +#include +#include +#include + +// #define ZIX_BTREE_DEBUG 1 +// #define ZIX_BTREE_SORTED_CHECK 1 + +#ifndef ZIX_BTREE_PAGE_SIZE +# define ZIX_BTREE_PAGE_SIZE 4096 +#endif + +#define ZIX_BTREE_NODE_SPACE (ZIX_BTREE_PAGE_SIZE - 2 * sizeof(uint16_t)) +#define ZIX_BTREE_LEAF_VALS ((ZIX_BTREE_NODE_SPACE / sizeof(void*)) - 1) +#define ZIX_BTREE_INODE_VALS (ZIX_BTREE_LEAF_VALS / 2) + +struct ZixBTreeImpl { + ZixBTreeNode* root; + ZixDestroyFunc destroy; + ZixComparator cmp; + const void* cmp_data; + size_t size; + unsigned height; ///< Number of levels, i.e. root only has height 1 +}; + +struct ZixBTreeNodeImpl { + uint16_t is_leaf; + uint16_t n_vals; + // On 64-bit we rely on some padding here to get page-sized nodes + union { + struct { + void* vals[ZIX_BTREE_LEAF_VALS]; + } leaf; + + struct { + void* vals[ZIX_BTREE_INODE_VALS]; + ZixBTreeNode* children[ZIX_BTREE_INODE_VALS + 1]; + } inode; + } data; +}; + +#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112l) || \ + (defined(__cplusplus) && __cplusplus >= 201103L) +static_assert(sizeof(ZixBTreeNode) == ZIX_BTREE_PAGE_SIZE, ""); +#endif + +typedef struct { + ZixBTreeNode* node; + unsigned index; +} ZixBTreeIterFrame; + +struct ZixBTreeIterImpl { + unsigned n_levels; ///< Maximum depth of stack + unsigned level; ///< Current level in stack + ZixBTreeIterFrame stack[]; ///< Position stack +}; + +#ifdef ZIX_BTREE_DEBUG + +static void +print_node(const ZixBTreeNode* n, const char* prefix) +{ + printf("%s[", prefix); + for (uint16_t v = 0; v < n->n_vals; ++v) { + printf(" %lu", (uintptr_t)n->vals[v]); + } + printf(" ]\n"); +} + +static void +print_tree(const ZixBTreeNode* parent, const ZixBTreeNode* node, int level) +{ + if (node) { + if (!parent) { + printf("TREE {\n"); + } + for (int i = 0; i < level + 1; ++i) { + printf(" "); + } + print_node(node, ""); + if (!node->is_leaf) { + for (uint16_t i = 0; i < node->n_vals + 1; ++i) { + print_tree(node, node->data.inode.children[i], level + 1); + } + } + if (!parent) { + printf("}\n"); + } + } +} + +#endif // ZIX_BTREE_DEBUG + +static ZixBTreeNode* +zix_btree_node_new(const bool leaf) +{ +#if !((defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112l) || \ + (defined(__cplusplus) && __cplusplus >= 201103L)) + assert(sizeof(ZixBTreeNode) == ZIX_BTREE_PAGE_SIZE); +#endif + + ZixBTreeNode* node = (ZixBTreeNode*)malloc(sizeof(ZixBTreeNode)); + if (node) { + node->is_leaf = leaf; + node->n_vals = 0; + } + return node; +} + +static void* +zix_btree_value(const ZixBTreeNode* const node, const unsigned i) +{ + assert(i < node->n_vals); + return node->is_leaf ? node->data.leaf.vals[i] : node->data.inode.vals[i]; +} + +static ZixBTreeNode* +zix_btree_child(const ZixBTreeNode* const node, const unsigned i) +{ + assert(!node->is_leaf); + assert(i <= ZIX_BTREE_INODE_VALS); + return node->data.inode.children[i]; +} + +ZixBTree* +zix_btree_new(const ZixComparator cmp, + const void* const cmp_data, + const ZixDestroyFunc destroy) +{ + ZixBTree* t = (ZixBTree*)malloc(sizeof(ZixBTree)); + if (t) { + t->root = zix_btree_node_new(true); + t->destroy = destroy; + t->cmp = cmp; + t->cmp_data = cmp_data; + t->size = 0; + t->height = 1; + if (!t->root) { + free(t); + return NULL; + } + } + return t; +} + +static void +zix_btree_free_rec(ZixBTree* const t, ZixBTreeNode* const n) +{ + if (n) { + if (n->is_leaf) { + if (t->destroy) { + for (uint16_t i = 0; i < n->n_vals; ++i) { + t->destroy(n->data.leaf.vals[i]); + } + } + } else { + if (t->destroy) { + for (uint16_t i = 0; i < n->n_vals; ++i) { + t->destroy(n->data.inode.vals[i]); + } + } + + for (uint16_t i = 0; i < n->n_vals + 1; ++i) { + zix_btree_free_rec(t, zix_btree_child(n, i)); + } + } + + free(n); + } +} + +void +zix_btree_free(ZixBTree* const t) +{ + if (t) { + zix_btree_free_rec(t, t->root); + free(t); + } +} + +size_t +zix_btree_size(const ZixBTree* const t) +{ + return t->size; +} + +static uint16_t +zix_btree_max_vals(const ZixBTreeNode* const node) +{ + return node->is_leaf ? ZIX_BTREE_LEAF_VALS : ZIX_BTREE_INODE_VALS; +} + +static uint16_t +zix_btree_min_vals(const ZixBTreeNode* const node) +{ + return (uint16_t)(((zix_btree_max_vals(node) + 1U) / 2U) - 1U); +} + +/** Shift pointers in `array` of length `n` right starting at `i`. */ +static void +zix_btree_ainsert(void** const array, + const unsigned n, + const unsigned i, + void* const e) +{ + memmove(array + i + 1, array + i, (n - i) * sizeof(e)); + array[i] = e; +} + +/** Erase element `i` in `array` of length `n` and return erased element. */ +static void* +zix_btree_aerase(void** const array, const unsigned n, const unsigned i) +{ + void* const ret = array[i]; + memmove(array + i, array + i + 1, (n - i) * sizeof(ret)); + return ret; +} + +/** Split lhs, the i'th child of `n`, into two nodes. */ +static ZixBTreeNode* +zix_btree_split_child(ZixBTreeNode* const n, + const unsigned i, + ZixBTreeNode* const lhs) +{ + assert(lhs->n_vals == zix_btree_max_vals(lhs)); + assert(n->n_vals < ZIX_BTREE_INODE_VALS); + assert(i < n->n_vals + 1U); + assert(zix_btree_child(n, i) == lhs); + + const uint16_t max_n_vals = zix_btree_max_vals(lhs); + ZixBTreeNode* rhs = zix_btree_node_new(lhs->is_leaf); + if (!rhs) { + return NULL; + } + + // LHS and RHS get roughly half, less the middle value which moves up + lhs->n_vals = max_n_vals / 2U; + rhs->n_vals = (uint16_t)(max_n_vals - lhs->n_vals - 1); + + if (lhs->is_leaf) { + // Copy large half from LHS to new RHS node + memcpy(rhs->data.leaf.vals, + lhs->data.leaf.vals + lhs->n_vals + 1, + rhs->n_vals * sizeof(void*)); + + // Move middle value up to parent + zix_btree_ainsert( + n->data.inode.vals, n->n_vals, i, lhs->data.leaf.vals[lhs->n_vals]); + } else { + // Copy large half from LHS to new RHS node + memcpy(rhs->data.inode.vals, + lhs->data.inode.vals + lhs->n_vals + 1, + rhs->n_vals * sizeof(void*)); + memcpy(rhs->data.inode.children, + lhs->data.inode.children + lhs->n_vals + 1, + (rhs->n_vals + 1U) * sizeof(ZixBTreeNode*)); + + // Move middle value up to parent + zix_btree_ainsert( + n->data.inode.vals, n->n_vals, i, lhs->data.inode.vals[lhs->n_vals]); + } + + // Insert new RHS node in parent at position i + zix_btree_ainsert((void**)n->data.inode.children, ++n->n_vals, i + 1U, rhs); + + return rhs; +} + +#ifdef ZIX_BTREE_SORTED_CHECK +/** Check that `n` is sorted with respect to search key `e`. */ +static bool +zix_btree_node_is_sorted_with_respect_to(const ZixBTree* const t, + const ZixBTreeNode* const n, + const void* const e) +{ + if (n->n_vals <= 1) { + return true; + } + + int cmp = t->cmp(zix_btree_value(n, 0), e, t->cmp_data); + for (uint16_t i = 1; i < n->n_vals; ++i) { + const int next_cmp = t->cmp(zix_btree_value(n, i), e, t->cmp_data); + if ((cmp >= 0 && next_cmp < 0) || (cmp > 0 && next_cmp <= 0)) { + return false; + } + cmp = next_cmp; + } + + return true; +} +#endif + +/** Find the first value in `n` that is not less than `e` (lower bound). */ +static unsigned +zix_btree_node_find(const ZixBTree* const t, + const ZixBTreeNode* const n, + const void* const e, + bool* const equal) +{ +#ifdef ZIX_BTREE_SORTED_CHECK + assert(zix_btree_node_is_sorted_with_respect_to(t, n, e)); +#endif + + unsigned first = 0U; + unsigned len = n->n_vals; + while (len > 0) { + const unsigned half = len >> 1U; + const unsigned i = first + half; + const int cmp = t->cmp(zix_btree_value(n, i), e, t->cmp_data); + if (cmp == 0) { + *equal = true; + len = half; // Keep searching for wildcard matches + } else if (cmp < 0) { + const unsigned chop = half + 1U; + first += chop; + len -= chop; + } else { + len = half; + } + } + + assert(!*equal || t->cmp(zix_btree_value(n, first), e, t->cmp_data) == 0); + return first; +} + +ZixStatus +zix_btree_insert(ZixBTree* const t, void* const e) +{ + ZixBTreeNode* parent = NULL; // Parent of n + ZixBTreeNode* n = t->root; // Current node + unsigned i = 0; // Index of n in parent + while (n) { + if (n->n_vals == zix_btree_max_vals(n)) { + // Node is full, split to ensure there is space for a leaf split + if (!parent) { + // Root is full, grow tree upwards + if (!(parent = zix_btree_node_new(false))) { + return ZIX_STATUS_NO_MEM; + } + t->root = parent; + parent->data.inode.children[0] = n; + ++t->height; + } + + ZixBTreeNode* const rhs = zix_btree_split_child(parent, i, n); + if (!rhs) { + return ZIX_STATUS_NO_MEM; + } + + const int cmp = t->cmp(parent->data.inode.vals[i], e, t->cmp_data); + if (cmp == 0) { + return ZIX_STATUS_EXISTS; + } + + if (cmp < 0) { + // Move to new RHS + n = rhs; + ++i; + } + } + + assert(!parent || zix_btree_child(parent, i) == n); + + bool equal = false; + i = zix_btree_node_find(t, n, e, &equal); + if (equal) { + return ZIX_STATUS_EXISTS; + } + + if (!n->is_leaf) { + // Descend to child node left of value + parent = n; + n = zix_btree_child(n, i); + } else { + // Insert into internal node + zix_btree_ainsert(n->data.leaf.vals, n->n_vals++, i, e); + break; + } + } + + ++t->size; + + return ZIX_STATUS_SUCCESS; +} + +static ZixBTreeIter* +zix_btree_iter_new(const ZixBTree* const t) +{ + const size_t s = t->height * sizeof(ZixBTreeIterFrame); + + ZixBTreeIter* i = (ZixBTreeIter*)calloc(1, sizeof(ZixBTreeIter) + s); + if (i) { + i->n_levels = t->height; + } + return i; +} + +static void +zix_btree_iter_set_frame(ZixBTreeIter* const ti, + ZixBTreeNode* const n, + const unsigned i) +{ + if (ti) { + ti->stack[ti->level].node = n; + ti->stack[ti->level].index = i; + } +} + +static bool +zix_btree_node_is_minimal(ZixBTreeNode* const n) +{ + assert(n->n_vals >= zix_btree_min_vals(n)); + return n->n_vals == zix_btree_min_vals(n); +} + +/** Enlarge left child by stealing a value from its right sibling. */ +static ZixBTreeNode* +zix_btree_rotate_left(ZixBTreeNode* const parent, const unsigned i) +{ + ZixBTreeNode* const lhs = zix_btree_child(parent, i); + ZixBTreeNode* const rhs = zix_btree_child(parent, i + 1); + + assert(lhs->is_leaf == rhs->is_leaf); + + if (lhs->is_leaf) { + // Move parent value to end of LHS + lhs->data.leaf.vals[lhs->n_vals++] = parent->data.inode.vals[i]; + + // Move first value in RHS to parent + parent->data.inode.vals[i] = + zix_btree_aerase(rhs->data.leaf.vals, rhs->n_vals, 0); + } else { + // Move parent value to end of LHS + lhs->data.inode.vals[lhs->n_vals++] = parent->data.inode.vals[i]; + + // Move first value in RHS to parent + parent->data.inode.vals[i] = + zix_btree_aerase(rhs->data.inode.vals, rhs->n_vals, 0); + + // Move first child pointer from RHS to end of LHS + lhs->data.inode.children[lhs->n_vals] = (ZixBTreeNode*)zix_btree_aerase( + (void**)rhs->data.inode.children, rhs->n_vals, 0); + } + + --rhs->n_vals; + + return lhs; +} + +/** Enlarge right child by stealing a value from its left sibling. */ +static ZixBTreeNode* +zix_btree_rotate_right(ZixBTreeNode* const parent, const unsigned i) +{ + ZixBTreeNode* const lhs = zix_btree_child(parent, i - 1); + ZixBTreeNode* const rhs = zix_btree_child(parent, i); + + assert(lhs->is_leaf == rhs->is_leaf); + + if (lhs->is_leaf) { + // Prepend parent value to RHS + zix_btree_ainsert( + rhs->data.leaf.vals, rhs->n_vals++, 0, parent->data.inode.vals[i - 1]); + + // Move last value from LHS to parent + parent->data.inode.vals[i - 1] = lhs->data.leaf.vals[--lhs->n_vals]; + } else { + // Prepend parent value to RHS + zix_btree_ainsert( + rhs->data.inode.vals, rhs->n_vals++, 0, parent->data.inode.vals[i - 1]); + + // Move last child pointer from LHS and prepend to RHS + zix_btree_ainsert((void**)rhs->data.inode.children, + rhs->n_vals, + 0, + lhs->data.inode.children[lhs->n_vals]); + + // Move last value from LHS to parent + parent->data.inode.vals[i - 1] = lhs->data.inode.vals[--lhs->n_vals]; + } + + return rhs; +} + +/** Move n[i] down, merge the left and right child, return the merged node. */ +static ZixBTreeNode* +zix_btree_merge(ZixBTree* const t, ZixBTreeNode* const n, const unsigned i) +{ + ZixBTreeNode* const lhs = zix_btree_child(n, i); + ZixBTreeNode* const rhs = zix_btree_child(n, i + 1); + + assert(lhs->is_leaf == rhs->is_leaf); + assert(zix_btree_node_is_minimal(lhs)); + assert(lhs->n_vals + rhs->n_vals < zix_btree_max_vals(lhs)); + + // Move parent value to end of LHS + if (lhs->is_leaf) { + lhs->data.leaf.vals[lhs->n_vals++] = + zix_btree_aerase(n->data.inode.vals, n->n_vals, i); + } else { + lhs->data.inode.vals[lhs->n_vals++] = + zix_btree_aerase(n->data.inode.vals, n->n_vals, i); + } + + // Erase corresponding child pointer (to RHS) in parent + zix_btree_aerase((void**)n->data.inode.children, n->n_vals, i + 1U); + + // Add everything from RHS to end of LHS + if (lhs->is_leaf) { + memcpy(lhs->data.leaf.vals + lhs->n_vals, + rhs->data.leaf.vals, + rhs->n_vals * sizeof(void*)); + } else { + memcpy(lhs->data.inode.vals + lhs->n_vals, + rhs->data.inode.vals, + rhs->n_vals * sizeof(void*)); + memcpy(lhs->data.inode.children + lhs->n_vals, + rhs->data.inode.children, + (rhs->n_vals + 1U) * sizeof(void*)); + } + + lhs->n_vals = (uint16_t)(lhs->n_vals + rhs->n_vals); + + if (--n->n_vals == 0) { + // Root is now empty, replace it with its only child + assert(n == t->root); + t->root = lhs; + free(n); + } + + free(rhs); + return lhs; +} + +/** Remove and return the min value from the subtree rooted at `n`. */ +static void* +zix_btree_remove_min(ZixBTree* const t, ZixBTreeNode* n) +{ + while (!n->is_leaf) { + if (zix_btree_node_is_minimal(zix_btree_child(n, 0))) { + // Leftmost child is minimal, must expand + if (!zix_btree_node_is_minimal(zix_btree_child(n, 1))) { + // Child's right sibling has at least one key to steal + n = zix_btree_rotate_left(n, 0); + } else { + // Both child and right sibling are minimal, merge + n = zix_btree_merge(t, n, 0); + } + } else { + n = zix_btree_child(n, 0); + } + } + + return zix_btree_aerase(n->data.leaf.vals, --n->n_vals, 0); +} + +/** Remove and return the max value from the subtree rooted at `n`. */ +static void* +zix_btree_remove_max(ZixBTree* const t, ZixBTreeNode* n) +{ + while (!n->is_leaf) { + if (zix_btree_node_is_minimal(zix_btree_child(n, n->n_vals))) { + // Leftmost child is minimal, must expand + if (!zix_btree_node_is_minimal(zix_btree_child(n, n->n_vals - 1u))) { + // Child's left sibling has at least one key to steal + n = zix_btree_rotate_right(n, n->n_vals); + } else { + // Both child and left sibling are minimal, merge + n = zix_btree_merge(t, n, n->n_vals - 1U); + } + } else { + n = zix_btree_child(n, n->n_vals); + } + } + + return n->data.leaf.vals[--n->n_vals]; +} + +ZixStatus +zix_btree_remove(ZixBTree* const t, + const void* const e, + void** const out, + ZixBTreeIter** const next) +{ + ZixBTreeNode* n = t->root; + ZixBTreeIter* ti = NULL; + const bool user_iter = next && *next; + if (next) { + if (!*next && !(*next = zix_btree_iter_new(t))) { + return ZIX_STATUS_NO_MEM; + } + ti = *next; + ti->level = 0; + } + + while (true) { + /* To remove in a single walk down, the tree is adjusted along the way + so that the current node always has at least one more value than the + minimum required in general. Thus, there is always room to remove + without adjusting on the way back up. */ + assert(n == t->root || !zix_btree_node_is_minimal(n)); + + bool equal = false; + const unsigned i = zix_btree_node_find(t, n, e, &equal); + zix_btree_iter_set_frame(ti, n, i); + if (n->is_leaf) { + if (equal) { + // Found in leaf node + *out = zix_btree_aerase(n->data.leaf.vals, --n->n_vals, i); + if (ti && i == n->n_vals) { + if (i == 0) { + ti->level = 0; + ti->stack[0].node = NULL; + } else { + --ti->stack[ti->level].index; + zix_btree_iter_increment(ti); + } + } + --t->size; + return ZIX_STATUS_SUCCESS; + } + + // Not found in leaf node, or tree + if (ti && !user_iter) { + zix_btree_iter_free(ti); + *next = NULL; + } + + return ZIX_STATUS_NOT_FOUND; + } + + if (equal) { + // Found in internal node + ZixBTreeNode* const lhs = zix_btree_child(n, i); + ZixBTreeNode* const rhs = zix_btree_child(n, i + 1); + const size_t l_size = lhs->n_vals; + const size_t r_size = rhs->n_vals; + if (zix_btree_node_is_minimal(lhs) && zix_btree_node_is_minimal(rhs)) { + // Both preceding and succeeding child are minimal + n = zix_btree_merge(t, n, i); + } else if (l_size >= r_size) { + // Left child can remove without merge + assert(!zix_btree_node_is_minimal(lhs)); + *out = n->data.inode.vals[i]; + n->data.inode.vals[i] = zix_btree_remove_max(t, lhs); + --t->size; + return ZIX_STATUS_SUCCESS; + } else { + // Right child can remove without merge + assert(!zix_btree_node_is_minimal(rhs)); + *out = n->data.inode.vals[i]; + n->data.inode.vals[i] = zix_btree_remove_min(t, rhs); + --t->size; + return ZIX_STATUS_SUCCESS; + } + } else { + // Not found in internal node, key is in/under children[i] + if (zix_btree_node_is_minimal(zix_btree_child(n, i))) { + if (i > 0 && !zix_btree_node_is_minimal(zix_btree_child(n, i - 1))) { + // Steal a key from child's left sibling + n = zix_btree_rotate_right(n, i); + } else if (i < n->n_vals && + !zix_btree_node_is_minimal(zix_btree_child(n, i + 1))) { + // Steal a key from child's right sibling + n = zix_btree_rotate_left(n, i); + } else if (n == t->root && n->n_vals == 1) { + // Root has two children, both minimal, delete it + assert(i == 0 || i == 1); + const uint16_t counts[2] = {zix_btree_child(n, 0)->n_vals, + zix_btree_child(n, 1)->n_vals}; + + n = zix_btree_merge(t, n, 0); + if (ti) { + ti->stack[ti->level].node = n; + ti->stack[ti->level].index = counts[i]; + } + } else { + // Both child's siblings are minimal, merge them + if (i < n->n_vals) { + n = zix_btree_merge(t, n, i); + } else { + n = zix_btree_merge(t, n, i - 1U); + if (ti) { + --ti->stack[ti->level].index; + } + } + } + } else { + n = zix_btree_child(n, i); + } + } + if (ti) { + ++ti->level; + } + } + + assert(false); // Not reached + return ZIX_STATUS_ERROR; +} + +ZixStatus +zix_btree_find(const ZixBTree* const t, + const void* const e, + ZixBTreeIter** const ti) +{ + ZixBTreeNode* n = t->root; + if (!(*ti = zix_btree_iter_new(t))) { + return ZIX_STATUS_NO_MEM; + } + + while (n) { + bool equal = false; + const unsigned i = zix_btree_node_find(t, n, e, &equal); + + zix_btree_iter_set_frame(*ti, n, i); + + if (equal) { + return ZIX_STATUS_SUCCESS; + } + + if (n->is_leaf) { + break; + } + + ++(*ti)->level; + n = zix_btree_child(n, i); + } + + zix_btree_iter_free(*ti); + *ti = NULL; + return ZIX_STATUS_NOT_FOUND; +} + +ZixStatus +zix_btree_lower_bound(const ZixBTree* const t, + const void* const e, + ZixBTreeIter** const ti) +{ + if (!t) { + *ti = NULL; + return ZIX_STATUS_BAD_ARG; + } + + if (!t->root) { + *ti = NULL; + return ZIX_STATUS_SUCCESS; + } + + ZixBTreeNode* n = t->root; + bool found = false; + unsigned found_level = 0; + if (!(*ti = zix_btree_iter_new(t))) { + return ZIX_STATUS_NO_MEM; + } + + while (n) { + bool equal = false; + const unsigned i = zix_btree_node_find(t, n, e, &equal); + + zix_btree_iter_set_frame(*ti, n, i); + + if (equal) { + found_level = (*ti)->level; + found = true; + } + + if (n->is_leaf) { + break; + } + + ++(*ti)->level; + n = zix_btree_child(n, i); + assert(n); + } + + const ZixBTreeIterFrame* const frame = &(*ti)->stack[(*ti)->level]; + assert(frame->node); + if (frame->index == frame->node->n_vals) { + if (found) { + // Found on a previous level but went too far + (*ti)->level = found_level; + } else { + // Reached end (key is greater than everything in tree) + (*ti)->level = 0; + (*ti)->stack[0].node = NULL; + } + } + + return ZIX_STATUS_SUCCESS; +} + +void* +zix_btree_get(const ZixBTreeIter* const ti) +{ + const ZixBTreeIterFrame* const frame = &ti->stack[ti->level]; + assert(frame->node); + assert(frame->index < frame->node->n_vals); + return zix_btree_value(frame->node, frame->index); +} + +ZixBTreeIter* +zix_btree_begin(const ZixBTree* const t) +{ + ZixBTreeIter* const i = zix_btree_iter_new(t); + if (!i) { + return NULL; + } + + if (t->size == 0) { + i->level = 0; + i->stack[0].node = NULL; + } else { + ZixBTreeNode* n = t->root; + i->stack[0].node = n; + i->stack[0].index = 0; + while (!n->is_leaf) { + n = zix_btree_child(n, 0); + ++i->level; + i->stack[i->level].node = n; + i->stack[i->level].index = 0; + } + } + + return i; +} + +ZixBTreeIter* +zix_btree_end(const ZixBTree* const t) +{ + return zix_btree_iter_new(t); +} + +ZixBTreeIter* +zix_btree_iter_copy(const ZixBTreeIter* const i) +{ + if (!i) { + return NULL; + } + + const size_t s = i->n_levels * sizeof(ZixBTreeIterFrame); + ZixBTreeIter* j = (ZixBTreeIter*)calloc(1, sizeof(ZixBTreeIter) + s); + if (j) { + memcpy(j, i, sizeof(ZixBTreeIter) + s); + } + + return j; +} + +bool +zix_btree_iter_is_end(const ZixBTreeIter* const i) +{ + return !i || (i->level == 0 && i->stack[0].node == NULL); +} + +bool +zix_btree_iter_equals(const ZixBTreeIter* const lhs, + const ZixBTreeIter* const rhs) +{ + if (zix_btree_iter_is_end(lhs) && zix_btree_iter_is_end(rhs)) { + return true; + } + + if (zix_btree_iter_is_end(lhs) || zix_btree_iter_is_end(rhs) || + lhs->level != rhs->level) { + return false; + } + + return !memcmp(lhs, + rhs, + sizeof(ZixBTreeIter) + + (lhs->level + 1) * sizeof(ZixBTreeIterFrame)); +} + +void +zix_btree_iter_increment(ZixBTreeIter* const i) +{ + ZixBTreeIterFrame* f = &i->stack[i->level]; + if (f->node->is_leaf) { + // Leaf, move right + assert(f->index < f->node->n_vals); + if (++f->index == f->node->n_vals) { + // Reached end of leaf, move up + f = &i->stack[i->level]; + while (i->level > 0 && f->index == f->node->n_vals) { + f = &i->stack[--i->level]; + assert(f->index <= f->node->n_vals); + } + + if (f->index == f->node->n_vals) { + // Reached end of tree + assert(i->level == 0); + f->node = NULL; + f->index = 0; + } + } + } else { + // Internal node, move down to next child + assert(f->index < f->node->n_vals); + ZixBTreeNode* child = zix_btree_child(f->node, ++f->index); + + f = &i->stack[++i->level]; + f->node = child; + f->index = 0; + + // Move down and left until we hit a leaf + while (!f->node->is_leaf) { + child = zix_btree_child(f->node, 0); + f = &i->stack[++i->level]; + f->node = child; + f->index = 0; + } + } +} + +void +zix_btree_iter_free(ZixBTreeIter* const i) +{ + free(i); +} diff --git a/src/zix/btree.h b/src/zix/btree.h new file mode 100644 index 00000000..bec06988 --- /dev/null +++ b/src/zix/btree.h @@ -0,0 +1,187 @@ +/* + Copyright 2011-2020 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef ZIX_BTREE_H +#define ZIX_BTREE_H + +#include "zix/common.h" + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @addtogroup zix + @{ + @name BTree + @{ +*/ + +/** + A B-Tree. +*/ +typedef struct ZixBTreeImpl ZixBTree; + +/** + A B-Tree node (opaque). +*/ +typedef struct ZixBTreeNodeImpl ZixBTreeNode; + +/** + An iterator over a B-Tree. + + Note that modifying the trees invalidates all iterators, so all iterators + are const iterators. +*/ +typedef struct ZixBTreeIterImpl ZixBTreeIter; + +/** + Create a new (empty) B-Tree. +*/ +ZIX_API +ZixBTree* +zix_btree_new(ZixComparator cmp, const void* cmp_data, ZixDestroyFunc destroy); + +/** + Free `t`. +*/ +ZIX_API +void +zix_btree_free(ZixBTree* t); + +/** + Return the number of elements in `t`. +*/ +ZIX_PURE_API +size_t +zix_btree_size(const ZixBTree* t); + +/** + Insert the element `e` into `t`. +*/ +ZIX_API +ZixStatus +zix_btree_insert(ZixBTree* t, void* e); + +/** + Remove the value `e` from `t`. + + @param t Tree to remove from. + + @param e Value to remove. + + @param out Set to point to the removed pointer (which may not equal `e`). + + @param next If non-NULL, pointed to the value following `e`. If *next is + also non-NULL, the iterator is reused, otherwise a new one is allocated. To + reuse an iterator, no items may have been added since its creation. +*/ +ZIX_API +ZixStatus +zix_btree_remove(ZixBTree* t, const void* e, void** out, ZixBTreeIter** next); + +/** + Set `ti` to an element equal to `e` in `t`. + If no such item exists, `ti` is set to NULL. +*/ +ZIX_API +ZixStatus +zix_btree_find(const ZixBTree* t, const void* e, ZixBTreeIter** ti); + +/** + Set `ti` to the smallest element in `t` that is not less than `e`. + + Wildcards are supported, so if the search key `e` compares equal to many + values in the tree, `ti` will be set to the least such element. The search + key `e` is always passed as the second argument to the comparator. +*/ +ZIX_API +ZixStatus +zix_btree_lower_bound(const ZixBTree* t, const void* e, ZixBTreeIter** ti); + +/** + Return the data associated with the given tree item. +*/ +ZIX_PURE_API +void* +zix_btree_get(const ZixBTreeIter* ti); + +/** + Return an iterator to the first (smallest) element in `t`. + + The returned iterator must be freed with zix_btree_iter_free(). +*/ +ZIX_PURE_API +ZixBTreeIter* +zix_btree_begin(const ZixBTree* t); + +/** + Return an iterator to the end of `t` (one past the last element). + + The returned iterator must be freed with zix_btree_iter_free(). +*/ +ZIX_API +ZixBTreeIter* +zix_btree_end(const ZixBTree* t); + +/** + Return a new copy of `i`. +*/ +ZIX_API +ZixBTreeIter* +zix_btree_iter_copy(const ZixBTreeIter* i); + +/** + Return true iff `lhs` is equal to `rhs`. +*/ +ZIX_PURE_API +bool +zix_btree_iter_equals(const ZixBTreeIter* lhs, const ZixBTreeIter* rhs); + +/** + Return true iff `i` is an iterator to the end of its tree. +*/ +ZIX_PURE_API +bool +zix_btree_iter_is_end(const ZixBTreeIter* i); + +/** + Increment `i` to point to the next element in the tree. +*/ +ZIX_API +void +zix_btree_iter_increment(ZixBTreeIter* i); + +/** + Free `i`. +*/ +ZIX_API +void +zix_btree_iter_free(ZixBTreeIter* i); + +/** + @} + @} +*/ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* ZIX_BTREE_H */ diff --git a/src/zix/common.h b/src/zix/common.h new file mode 100644 index 00000000..d47586c6 --- /dev/null +++ b/src/zix/common.h @@ -0,0 +1,138 @@ +/* + Copyright 2016-2020 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef ZIX_COMMON_H +#define ZIX_COMMON_H + +#include + +/** + @addtogroup zix + @{ +*/ + +/** @cond */ +#if defined(_WIN32) && !defined(ZIX_STATIC) && defined(ZIX_INTERNAL) +# define ZIX_API __declspec(dllexport) +#elif defined(_WIN32) && !defined(ZIX_STATIC) +# define ZIX_API __declspec(dllimport) +#elif defined(__GNUC__) +# define ZIX_API __attribute__((visibility("default"))) +#else +# define ZIX_API +#endif + +#ifdef __GNUC__ +# define ZIX_PURE_FUNC __attribute__((pure)) +# define ZIX_CONST_FUNC __attribute__((const)) +# define ZIX_MALLOC_FUNC __attribute__((malloc)) +#else +# define ZIX_PURE_FUNC +# define ZIX_CONST_FUNC +# define ZIX_MALLOC_FUNC +#endif + +#define ZIX_PURE_API \ + ZIX_API \ + ZIX_PURE_FUNC + +#define ZIX_CONST_API \ + ZIX_API \ + ZIX_CONST_FUNC + +#define ZIX_MALLOC_API \ + ZIX_API \ + ZIX_MALLOC_FUNC + +/** @endcond */ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __GNUC__ +# define ZIX_LOG_FUNC(fmt, arg1) __attribute__((format(printf, fmt, arg1))) +#else +# define ZIX_LOG_FUNC(fmt, arg1) +#endif + +// Unused parameter macro to suppresses warnings and make it impossible to use +#if defined(__cplusplus) +# define ZIX_UNUSED(name) +#elif defined(__GNUC__) +# define ZIX_UNUSED(name) name##_unused __attribute__((__unused__)) +#else +# define ZIX_UNUSED(name) name +#endif + +typedef enum { + ZIX_STATUS_SUCCESS, + ZIX_STATUS_ERROR, + ZIX_STATUS_NO_MEM, + ZIX_STATUS_NOT_FOUND, + ZIX_STATUS_EXISTS, + ZIX_STATUS_BAD_ARG, + ZIX_STATUS_BAD_PERMS +} ZixStatus; + +static inline const char* +zix_strerror(const ZixStatus status) +{ + switch (status) { + case ZIX_STATUS_SUCCESS: + return "Success"; + case ZIX_STATUS_ERROR: + return "Unknown error"; + case ZIX_STATUS_NO_MEM: + return "Out of memory"; + case ZIX_STATUS_NOT_FOUND: + return "Not found"; + case ZIX_STATUS_EXISTS: + return "Exists"; + case ZIX_STATUS_BAD_ARG: + return "Bad argument"; + case ZIX_STATUS_BAD_PERMS: + return "Bad permissions"; + } + return "Unknown error"; +} + +/** + Function for comparing two elements. +*/ +typedef int (*ZixComparator)(const void* a, + const void* b, + const void* user_data); + +/** + Function for testing equality of two elements. +*/ +typedef bool (*ZixEqualFunc)(const void* a, const void* b); + +/** + Function to destroy an element. +*/ +typedef void (*ZixDestroyFunc)(void* ptr); + +/** + @} +*/ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* ZIX_COMMON_H */ diff --git a/src/zix/digest.c b/src/zix/digest.c new file mode 100644 index 00000000..47d27b94 --- /dev/null +++ b/src/zix/digest.c @@ -0,0 +1,141 @@ +/* + Copyright 2012-2020 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "zix/digest.h" + +#ifdef __SSE4_2__ +# include +#endif + +#include +#include + +#ifdef __SSE4_2__ + +// SSE 4.2 CRC32 + +uint32_t +zix_digest_start(void) +{ + return 1; +} + +uint32_t +zix_digest_add(uint32_t hash, const void* const buf, const size_t len) +{ + const uint8_t* str = (const uint8_t*)buf; + +# ifdef __x86_64__ + for (size_t i = 0; i < (len / sizeof(uint64_t)); ++i) { + hash = (uint32_t)_mm_crc32_u64(hash, *(const uint64_t*)str); + str += sizeof(uint64_t); + } + if (len & sizeof(uint32_t)) { + hash = _mm_crc32_u32(hash, *(const uint32_t*)str); + str += sizeof(uint32_t); + } +# else + for (size_t i = 0; i < (len / sizeof(uint32_t)); ++i) { + hash = _mm_crc32_u32(hash, *(const uint32_t*)str); + str += sizeof(uint32_t); + } +# endif + if (len & sizeof(uint16_t)) { + hash = _mm_crc32_u16(hash, *(const uint16_t*)str); + str += sizeof(uint16_t); + } + if (len & sizeof(uint8_t)) { + hash = _mm_crc32_u8(hash, *(const uint8_t*)str); + } + + return hash; +} + +uint32_t +zix_digest_add_64(uint32_t hash, const void* const buf, const size_t len) +{ + assert((uintptr_t)buf % sizeof(uint64_t) == 0); + assert(len % sizeof(uint64_t) == 0); + +# ifdef __x86_64__ + const uint64_t* ptr = (const uint64_t*)buf; + + for (size_t i = 0; i < (len / sizeof(uint64_t)); ++i) { + hash = (uint32_t)_mm_crc32_u64(hash, *ptr); + ++ptr; + } + + return hash; +# else + const uint32_t* ptr = (const uint32_t*)buf; + + for (size_t i = 0; i < (len / sizeof(uint32_t)); ++i) { + hash = _mm_crc32_u32(hash, *ptr); + ++ptr; + } + + return hash; +# endif +} + +uint32_t +zix_digest_add_ptr(const uint32_t hash, const void* const ptr) +{ +# ifdef __x86_64__ + return (uint32_t)_mm_crc32_u64(hash, (uintptr_t)ptr); +# else + return _mm_crc32_u32(hash, (uintptr_t)ptr); +# endif +} + +#else + +// Classic DJB hash + +uint32_t +zix_digest_start(void) +{ + return 5381; +} + +uint32_t +zix_digest_add(uint32_t hash, const void* const buf, const size_t len) +{ + const uint8_t* str = (const uint8_t*)buf; + + for (size_t i = 0; i < len; ++i) { + hash = (hash << 5u) + hash + str[i]; + } + + return hash; +} + +uint32_t +zix_digest_add_64(uint32_t hash, const void* const buf, const size_t len) +{ + assert((uintptr_t)buf % sizeof(uint64_t) == 0); + assert(len % sizeof(uint64_t) == 0); + + return zix_digest_add(hash, buf, len); +} + +uint32_t +zix_digest_add_ptr(const uint32_t hash, const void* const ptr) +{ + return zix_digest_add(hash, &ptr, sizeof(ptr)); +} + +#endif diff --git a/src/zix/digest.h b/src/zix/digest.h new file mode 100644 index 00000000..1fde77a9 --- /dev/null +++ b/src/zix/digest.h @@ -0,0 +1,67 @@ +/* + Copyright 2012-2020 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef ZIX_DIGEST_H +#define ZIX_DIGEST_H + +#include "zix/common.h" + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + Return an initial empty digest value. +*/ +ZIX_CONST_API +uint32_t +zix_digest_start(void); + +/** + Update `hash` to include `buf`, a buffer of `len` bytes. + + This can be used for any size or alignment. +*/ +ZIX_PURE_API +uint32_t +zix_digest_add(uint32_t hash, const void* buf, size_t len); + +/** + Update `hash` to include `buf`, a 64-bit aligned buffer of `len` bytes. + + Both `buf` and `len` must be evenly divisible by 8 (64 bits). +*/ +ZIX_PURE_API +uint32_t +zix_digest_add_64(uint32_t hash, const void* buf, size_t len); + +/** + Update `hash` to include `ptr`. + + This hashes the value of the pointer itself, and does not dereference `ptr`. +*/ +ZIX_CONST_API +uint32_t +zix_digest_add_ptr(uint32_t hash, const void* ptr); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* ZIX_DIGEST_H */ diff --git a/src/zix/hash.c b/src/zix/hash.c new file mode 100644 index 00000000..81830820 --- /dev/null +++ b/src/zix/hash.c @@ -0,0 +1,230 @@ +/* + Copyright 2011-2020 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "zix/hash.h" + +#include +#include +#include + +/** + Primes, each slightly less than twice its predecessor, and as far away + from powers of two as possible. +*/ +static const unsigned sizes[] = { + 53, 97, 193, 389, 769, 1543, 3079, + 6151, 12289, 24593, 49157, 98317, 196613, 393241, + 786433, 1572869, 3145739, 6291469, 12582917, 25165843, 50331653, + 100663319, 201326611, 402653189, 805306457, 1610612741, 0}; + +typedef struct ZixHashEntry { + struct ZixHashEntry* next; ///< Next entry in bucket + uint32_t hash; ///< Non-modulo hash value + // Value follows here (access with zix_hash_value) +} ZixHashEntry; + +struct ZixHashImpl { + ZixHashFunc hash_func; + ZixEqualFunc equal_func; + ZixHashEntry** buckets; + const unsigned* n_buckets; + size_t value_size; + unsigned count; +}; + +static inline void* +zix_hash_value(ZixHashEntry* entry) +{ + return entry + 1; +} + +ZixHash* +zix_hash_new(ZixHashFunc hash_func, ZixEqualFunc equal_func, size_t value_size) +{ + ZixHash* hash = (ZixHash*)malloc(sizeof(ZixHash)); + if (hash) { + hash->hash_func = hash_func; + hash->equal_func = equal_func; + hash->n_buckets = &sizes[0]; + hash->value_size = value_size; + hash->count = 0; + if (!(hash->buckets = + (ZixHashEntry**)calloc(*hash->n_buckets, sizeof(ZixHashEntry*)))) { + free(hash); + return NULL; + } + } + return hash; +} + +void +zix_hash_free(ZixHash* hash) +{ + if (!hash) { + return; + } + + for (unsigned b = 0; b < *hash->n_buckets; ++b) { + ZixHashEntry* bucket = hash->buckets[b]; + for (ZixHashEntry* e = bucket; e;) { + ZixHashEntry* next = e->next; + free(e); + e = next; + } + } + + free(hash->buckets); + free(hash); +} + +size_t +zix_hash_size(const ZixHash* hash) +{ + return hash->count; +} + +static inline void +insert_entry(ZixHashEntry** bucket, ZixHashEntry* entry) +{ + entry->next = *bucket; + *bucket = entry; +} + +static inline ZixStatus +rehash(ZixHash* hash, unsigned new_n_buckets) +{ + ZixHashEntry** new_buckets = + (ZixHashEntry**)calloc(new_n_buckets, sizeof(ZixHashEntry*)); + if (!new_buckets) { + return ZIX_STATUS_NO_MEM; + } + + const unsigned old_n_buckets = *hash->n_buckets; + for (unsigned b = 0; b < old_n_buckets; ++b) { + for (ZixHashEntry* e = hash->buckets[b]; e;) { + ZixHashEntry* const next = e->next; + const unsigned h = e->hash % new_n_buckets; + insert_entry(&new_buckets[h], e); + e = next; + } + } + + free(hash->buckets); + hash->buckets = new_buckets; + + return ZIX_STATUS_SUCCESS; +} + +static inline ZixHashEntry* +find_entry(const ZixHash* hash, + const void* key, + const unsigned h, + const unsigned h_nomod) +{ + for (ZixHashEntry* e = hash->buckets[h]; e; e = e->next) { + if (e->hash == h_nomod && hash->equal_func(zix_hash_value(e), key)) { + return e; + } + } + return NULL; +} + +void* +zix_hash_find(const ZixHash* hash, const void* value) +{ + const unsigned h_nomod = hash->hash_func(value); + const unsigned h = h_nomod % *hash->n_buckets; + ZixHashEntry* const entry = find_entry(hash, value, h, h_nomod); + return entry ? zix_hash_value(entry) : 0; +} + +ZixStatus +zix_hash_insert(ZixHash* hash, const void* value, void** inserted) +{ + unsigned h_nomod = hash->hash_func(value); + unsigned h = h_nomod % *hash->n_buckets; + + ZixHashEntry* elem = find_entry(hash, value, h, h_nomod); + if (elem) { + assert(elem->hash == h_nomod); + if (inserted) { + *inserted = zix_hash_value(elem); + } + return ZIX_STATUS_EXISTS; + } + + elem = (ZixHashEntry*)malloc(sizeof(ZixHashEntry) + hash->value_size); + if (!elem) { + return ZIX_STATUS_NO_MEM; + } + elem->next = NULL; + elem->hash = h_nomod; + memcpy(elem + 1, value, hash->value_size); + + const unsigned next_n_buckets = *(hash->n_buckets + 1); + if (next_n_buckets != 0 && (hash->count + 1) >= next_n_buckets) { + if (!rehash(hash, next_n_buckets)) { + h = h_nomod % *(++hash->n_buckets); + } + } + + insert_entry(&hash->buckets[h], elem); + ++hash->count; + if (inserted) { + *inserted = zix_hash_value(elem); + } + return ZIX_STATUS_SUCCESS; +} + +ZixStatus +zix_hash_remove(ZixHash* hash, const void* value) +{ + const unsigned h_nomod = hash->hash_func(value); + const unsigned h = h_nomod % *hash->n_buckets; + + ZixHashEntry** next_ptr = &hash->buckets[h]; + for (ZixHashEntry* e = hash->buckets[h]; e; e = e->next) { + if (h_nomod == e->hash && hash->equal_func(zix_hash_value(e), value)) { + *next_ptr = e->next; + free(e); + return ZIX_STATUS_SUCCESS; + } + next_ptr = &e->next; + } + + if (hash->n_buckets != sizes) { + const unsigned prev_n_buckets = *(hash->n_buckets - 1); + if (hash->count - 1 <= prev_n_buckets) { + if (!rehash(hash, prev_n_buckets)) { + --hash->n_buckets; + } + } + } + + --hash->count; + return ZIX_STATUS_NOT_FOUND; +} + +void +zix_hash_foreach(ZixHash* hash, ZixHashVisitFunc f, void* user_data) +{ + for (unsigned b = 0; b < *hash->n_buckets; ++b) { + ZixHashEntry* bucket = hash->buckets[b]; + for (ZixHashEntry* e = bucket; e; e = e->next) { + f(zix_hash_value(e), user_data); + } + } +} diff --git a/src/zix/hash.h b/src/zix/hash.h new file mode 100644 index 00000000..39050381 --- /dev/null +++ b/src/zix/hash.h @@ -0,0 +1,138 @@ +/* + Copyright 2011-2020 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef ZIX_HASH_H +#define ZIX_HASH_H + +#include "zix/common.h" + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @addtogroup zix + @{ + @name Hash + @{ +*/ + +typedef struct ZixHashImpl ZixHash; + +/** + Function for computing the hash of an element. +*/ +typedef uint32_t (*ZixHashFunc)(const void* value); + +/** + Function to visit a hash element. +*/ +typedef void (*ZixHashVisitFunc)(void* value, void* user_data); + +/** + Create a new hash table. + + To minimize space overhead, unlike many hash tables this stores a single + value, not a key and a value. Any size of value can be stored, but all the + values in the hash table must be the same size, and the values must be safe + to copy with memcpy. To get key:value behaviour, simply insert a struct + with a key and value into the hash. + + @param hash_func The hashing function. + @param equal_func A function to test value equality. + @param value_size The size of the values to be stored. +*/ +ZIX_API +ZixHash* +zix_hash_new(ZixHashFunc hash_func, ZixEqualFunc equal_func, size_t value_size); + +/** + Free `hash`. +*/ +ZIX_API +void +zix_hash_free(ZixHash* hash); + +/** + Return the number of elements in `hash`. +*/ +ZIX_PURE_API +size_t +zix_hash_size(const ZixHash* hash); + +/** + Insert an item into `hash`. + + If no matching value is found, ZIX_STATUS_SUCCESS will be returned, and @p + inserted will be pointed to the copy of `value` made in the new hash node. + + If a matching value already exists, ZIX_STATUS_EXISTS will be returned, and + `inserted` will be pointed to the existing value. + + @param hash The hash table. + @param value The value to be inserted. + @param inserted The copy of `value` in the hash table. + @return ZIX_STATUS_SUCCESS, ZIX_STATUS_EXISTS, or ZIX_STATUS_NO_MEM. +*/ +ZIX_API +ZixStatus +zix_hash_insert(ZixHash* hash, const void* value, void** inserted); + +/** + Remove an item from `hash`. + + @param hash The hash table. + @param value The value to remove. + @return ZIX_STATUS_SUCCES or ZIX_STATUS_NOT_FOUND. +*/ +ZIX_API +ZixStatus +zix_hash_remove(ZixHash* hash, const void* value); + +/** + Search for an item in `hash`. + + @param hash The hash table. + @param value The value to search for. +*/ +ZIX_API +void* +zix_hash_find(const ZixHash* hash, const void* value); + +/** + Call `f` on each value in `hash`. + + @param hash The hash table. + @param f The function to call on each value. + @param user_data The user_data parameter passed to `f`. +*/ +ZIX_API +void +zix_hash_foreach(ZixHash* hash, ZixHashVisitFunc f, void* user_data); + +/** + @} + @} +*/ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* ZIX_HASH_H */ -- cgit v1.2.1