From 02a9f3a340e441a3b12221d4a969362de5e8edd0 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Wed, 6 Jun 2018 20:35:46 +0200 Subject: Add zix data structures --- src/zix/btree.c | 975 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/zix/btree.h | 213 ++++++++++++ src/zix/common.h | 137 ++++++++ src/zix/digest.c | 141 ++++++++ src/zix/digest.h | 67 ++++ src/zix/hash.c | 230 +++++++++++++ src/zix/hash.h | 138 ++++++++ 7 files changed, 1901 insertions(+) create mode 100644 src/zix/btree.c create mode 100644 src/zix/btree.h create mode 100644 src/zix/common.h create mode 100644 src/zix/digest.c create mode 100644 src/zix/digest.h create mode 100644 src/zix/hash.c create mode 100644 src/zix/hash.h (limited to 'src/zix') diff --git a/src/zix/btree.c b/src/zix/btree.c new file mode 100644 index 00000000..56dfa98c --- /dev/null +++ b/src/zix/btree.c @@ -0,0 +1,975 @@ +/* + Copyright 2011-2021 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "zix/btree.h" + +#include +#include +#include +#include + +// #define ZIX_BTREE_SORTED_CHECK 1 + +// Define ZixShort as an integer type half the size of a pointer +#if UINTPTR_MAX >= UINT32_MAX +typedef uint32_t ZixShort; +#else +typedef uint16_t ZixShort; +#endif + +#ifndef ZIX_BTREE_PAGE_SIZE +# define ZIX_BTREE_PAGE_SIZE 4096u +#endif + +#define ZIX_BTREE_NODE_SPACE (ZIX_BTREE_PAGE_SIZE - 2u * sizeof(ZixShort)) +#define ZIX_BTREE_LEAF_VALS ((ZIX_BTREE_NODE_SPACE / sizeof(void*)) - 1u) +#define ZIX_BTREE_INODE_VALS (ZIX_BTREE_LEAF_VALS / 2u) + +struct ZixBTreeImpl { + ZixBTreeNode* root; + ZixComparator cmp; + const void* cmp_data; + size_t size; +}; + +struct ZixBTreeNodeImpl { + ZixShort is_leaf; + ZixShort n_vals; + + union { + struct { + void* vals[ZIX_BTREE_LEAF_VALS]; + } leaf; + + struct { + void* vals[ZIX_BTREE_INODE_VALS]; + ZixBTreeNode* children[ZIX_BTREE_INODE_VALS + 1u]; + } inode; + } data; +}; + +#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112l) || \ + (defined(__cplusplus) && __cplusplus >= 201103L) +static_assert(sizeof(ZixBTreeNode) == ZIX_BTREE_PAGE_SIZE, ""); +#endif + +static ZixBTreeNode* +zix_btree_node_new(const bool leaf) +{ +#if !((defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112l) || \ + (defined(__cplusplus) && __cplusplus >= 201103L)) + assert(sizeof(ZixBTreeNode) <= ZIX_BTREE_PAGE_SIZE); + assert(sizeof(ZixBTreeNode) >= + ZIX_BTREE_PAGE_SIZE - 2u * sizeof(ZixBTreeNode*)); +#endif + + ZixBTreeNode* const node = (ZixBTreeNode*)malloc(sizeof(ZixBTreeNode)); + if (node) { + node->is_leaf = leaf; + node->n_vals = 0u; + } + + return node; +} + +ZIX_PURE_FUNC +static ZixBTreeNode* +zix_btree_child(const ZixBTreeNode* const node, const unsigned i) +{ + assert(!node->is_leaf); + assert(i <= ZIX_BTREE_INODE_VALS); + return node->data.inode.children[i]; +} + +ZixBTree* +zix_btree_new(const ZixComparator cmp, const void* const cmp_data) +{ + ZixBTree* const t = (ZixBTree*)malloc(sizeof(ZixBTree)); + if (!t) { + return NULL; + } + + if (!(t->root = zix_btree_node_new(true))) { + free(t); + return NULL; + } + + t->cmp = cmp; + t->cmp_data = cmp_data; + t->size = 0; + + return t; +} + +static void +zix_btree_free_children(ZixBTree* const t, + ZixBTreeNode* const n, + ZixDestroyFunc destroy) +{ + if (!n->is_leaf) { + for (ZixShort i = 0; i < n->n_vals + 1u; ++i) { + zix_btree_free_children(t, zix_btree_child(n, i), destroy); + free(zix_btree_child(n, i)); + } + } + + if (destroy) { + if (n->is_leaf) { + for (ZixShort i = 0u; i < n->n_vals; ++i) { + destroy(n->data.leaf.vals[i]); + } + } else { + for (ZixShort i = 0u; i < n->n_vals; ++i) { + destroy(n->data.inode.vals[i]); + } + } + } +} + +void +zix_btree_free(ZixBTree* const t, ZixDestroyFunc destroy) +{ + if (t) { + zix_btree_clear(t, destroy); + free(t->root); + free(t); + } +} + +void +zix_btree_clear(ZixBTree* const t, ZixDestroyFunc destroy) +{ + zix_btree_free_children(t, t->root, destroy); + + memset(t->root, 0, sizeof(ZixBTreeNode)); + t->root->is_leaf = true; + t->size = 0u; +} + +size_t +zix_btree_size(const ZixBTree* const t) +{ + return t->size; +} + +static ZixShort +zix_btree_max_vals(const ZixBTreeNode* const node) +{ + return node->is_leaf ? ZIX_BTREE_LEAF_VALS : ZIX_BTREE_INODE_VALS; +} + +static ZixShort +zix_btree_min_vals(const ZixBTreeNode* const node) +{ + return (ZixShort)(((zix_btree_max_vals(node) + 1u) / 2u) - 1u); +} + +/// Shift pointers in `array` of length `n` right starting at `i` +static void +zix_btree_ainsert(void** const array, + const unsigned n, + const unsigned i, + void* const e) +{ + memmove(array + i + 1, array + i, (n - i) * sizeof(e)); + array[i] = e; +} + +/// Erase element `i` in `array` of length `n` and return erased element +static void* +zix_btree_aerase(void** const array, const unsigned n, const unsigned i) +{ + void* const ret = array[i]; + memmove(array + i, array + i + 1, (n - i) * sizeof(ret)); + return ret; +} + +/// Split lhs, the i'th child of `n`, into two nodes +static ZixBTreeNode* +zix_btree_split_child(ZixBTreeNode* const n, + const unsigned i, + ZixBTreeNode* const lhs) +{ + assert(lhs->n_vals == zix_btree_max_vals(lhs)); + assert(n->n_vals < ZIX_BTREE_INODE_VALS); + assert(i < n->n_vals + 1U); + assert(zix_btree_child(n, i) == lhs); + + const ZixShort max_n_vals = zix_btree_max_vals(lhs); + ZixBTreeNode* rhs = zix_btree_node_new(lhs->is_leaf); + if (!rhs) { + return NULL; + } + + // LHS and RHS get roughly half, less the middle value which moves up + lhs->n_vals = max_n_vals / 2U; + rhs->n_vals = (ZixShort)(max_n_vals - lhs->n_vals - 1); + + if (lhs->is_leaf) { + // Copy large half from LHS to new RHS node + memcpy(rhs->data.leaf.vals, + lhs->data.leaf.vals + lhs->n_vals + 1, + rhs->n_vals * sizeof(void*)); + + // Move middle value up to parent + zix_btree_ainsert( + n->data.inode.vals, n->n_vals, i, lhs->data.leaf.vals[lhs->n_vals]); + } else { + // Copy large half from LHS to new RHS node + memcpy(rhs->data.inode.vals, + lhs->data.inode.vals + lhs->n_vals + 1, + rhs->n_vals * sizeof(void*)); + memcpy(rhs->data.inode.children, + lhs->data.inode.children + lhs->n_vals + 1, + (rhs->n_vals + 1U) * sizeof(ZixBTreeNode*)); + + // Move middle value up to parent + zix_btree_ainsert( + n->data.inode.vals, n->n_vals, i, lhs->data.inode.vals[lhs->n_vals]); + } + + // Insert new RHS node in parent at position i + zix_btree_ainsert((void**)n->data.inode.children, ++n->n_vals, i + 1U, rhs); + + return rhs; +} + +#ifdef ZIX_BTREE_SORTED_CHECK +/// Check that `n` is sorted with respect to search key `e` +static bool +zix_btree_node_is_sorted_with_respect_to(const ZixComparator compare, + const void* const compare_user_data, + void* const* const values, + const unsigned n_values, + const void* const key) +{ + if (n_values <= 1u) { + return true; + } + + int cmp = compare(values[0], key, compare_user_data); + for (unsigned i = 1u; i < n_values; ++i) { + const int next_cmp = compare(values[i], key, compare_user_data); + if ((cmp >= 0 && next_cmp < 0) || (cmp > 0 && next_cmp <= 0)) { + return false; + } + + cmp = next_cmp; + } + + return true; +} +#endif + +static unsigned +zix_btree_find_value(const ZixComparator compare, + const void* const compare_user_data, + void* const* const values, + const unsigned n_values, + const void* const key, + bool* const equal) +{ + unsigned first = 0u; + unsigned count = n_values; + + while (count > 0u) { + const unsigned half = count >> 1u; + const unsigned i = first + half; + void* const value = values[i]; + const int cmp = compare(value, key, compare_user_data); + + if (!cmp) { + *equal = true; + return i; + } + + if (cmp < 0) { + first += half + 1u; + count -= half + 1u; + } else { + count = half; + } + } + + assert(first == n_values || compare(values[first], key, compare_user_data)); + *equal = false; + return first; +} + +static unsigned +zix_btree_find_pattern(const ZixComparator compare_key, + const void* const compare_key_user_data, + void* const* const values, + const unsigned n_values, + const void* const key, + bool* const equal) +{ +#ifdef ZIX_BTREE_SORTED_CHECK + assert(zix_btree_node_is_sorted_with_respect_to( + compare_key, compare_key_user_data, values, n_values, key)); +#endif + + unsigned first = 0u; + unsigned count = n_values; + + while (count > 0u) { + const unsigned half = count >> 1u; + const unsigned i = first + half; + void* const value = values[i]; + const int cmp = compare_key(value, key, compare_key_user_data); + + if (cmp == 0) { + // Found a match, but keep searching for the leftmost one + *equal = true; + count = half; + + } else if (cmp < 0) { + // Search right half + first += half + 1u; + count -= half + 1u; + + } else { + // Search left half + count = half; + } + } + + assert(!*equal || + (compare_key(values[first], key, compare_key_user_data) == 0 && + (first == 0u || + (compare_key(values[first - 1u], key, compare_key_user_data) < 0)))); + + return first; +} + +/// Convenience wrapper to find a value in an internal node +static unsigned +zix_btree_inode_find(const ZixBTree* const t, + const ZixBTreeNode* const n, + const void* const e, + bool* const equal) +{ + assert(!n->is_leaf); + + return zix_btree_find_value( + t->cmp, t->cmp_data, n->data.inode.vals, n->n_vals, e, equal); +} + +/// Convenience wrapper to find a value in a leaf node +static unsigned +zix_btree_leaf_find(const ZixBTree* const t, + const ZixBTreeNode* const n, + const void* const e, + bool* const equal) +{ + assert(n->is_leaf); + + return zix_btree_find_value( + t->cmp, t->cmp_data, n->data.leaf.vals, n->n_vals, e, equal); +} + +static inline bool +zix_btree_can_remove_from(const ZixBTreeNode* const n) +{ + assert(n->n_vals >= zix_btree_min_vals(n)); + return n->n_vals > zix_btree_min_vals(n); +} + +static inline bool +zix_btree_is_full(const ZixBTreeNode* const n) +{ + assert(n->n_vals <= zix_btree_max_vals(n)); + return n->n_vals == zix_btree_max_vals(n); +} + +static ZixStatus +zix_btree_grow_up(ZixBTree* const t) +{ + ZixBTreeNode* const new_root = zix_btree_node_new(false); + if (!new_root) { + return ZIX_STATUS_NO_MEM; + } + + // Set old root as the only child of the new root + new_root->data.inode.children[0] = t->root; + + // Split the old root to get two balanced siblings + zix_btree_split_child(new_root, 0, t->root); + t->root = new_root; + + return ZIX_STATUS_SUCCESS; +} + +ZixStatus +zix_btree_insert(ZixBTree* const t, void* const e) +{ + ZixStatus st = ZIX_STATUS_SUCCESS; + + // Grow up if necessary to ensure the root is not full + if (zix_btree_is_full(t->root)) { + if ((st = zix_btree_grow_up(t))) { + return st; + } + } + + // Walk down from the root until we reach a suitable leaf + ZixBTreeNode* node = t->root; + while (!node->is_leaf) { + // Search for the value in this node + bool equal = false; + const unsigned i = zix_btree_inode_find(t, node, e, &equal); + if (equal) { + return ZIX_STATUS_EXISTS; + } + + // Value not in this node, but may be in the ith child + ZixBTreeNode* child = node->data.inode.children[i]; + if (zix_btree_is_full(child)) { + // The child is full, split it before continuing + ZixBTreeNode* const rhs = zix_btree_split_child(node, i, child); + if (!rhs) { + return ZIX_STATUS_NO_MEM; + } + + // Compare with new split value to determine which side to use + const int cmp = t->cmp(node->data.inode.vals[i], e, t->cmp_data); + if (cmp < 0) { + child = rhs; // Split value is less than the new value, move right + } else if (cmp == 0) { + return ZIX_STATUS_EXISTS; // Split value is exactly the value to insert + } + } + + // Descend to child node and continue + node = child; + } + + // Search for the value in the leaf + bool equal = false; + const unsigned i = zix_btree_leaf_find(t, node, e, &equal); + if (equal) { + return ZIX_STATUS_EXISTS; + } + + // The value is not in the tree, insert into the leaf + zix_btree_ainsert(node->data.leaf.vals, node->n_vals++, i, e); + ++t->size; + return ZIX_STATUS_SUCCESS; +} + +static void +zix_btree_iter_set_frame(ZixBTreeIter* const ti, + ZixBTreeNode* const n, + const ZixShort i) +{ + ti->nodes[ti->level] = n; + ti->indexes[ti->level] = (uint16_t)i; +} + +static void +zix_btree_iter_push(ZixBTreeIter* const ti, + ZixBTreeNode* const n, + const ZixShort i) +{ + assert(ti->level < ZIX_BTREE_MAX_HEIGHT); + ++ti->level; + ti->nodes[ti->level] = n; + ti->indexes[ti->level] = (uint16_t)i; +} + +static void +zix_btree_iter_pop(ZixBTreeIter* const ti) +{ + assert(ti->level > 0u); + ti->nodes[ti->level] = NULL; + ti->indexes[ti->level] = 0u; + --ti->level; +} + +/// Enlarge left child by stealing a value from its right sibling +static ZixBTreeNode* +zix_btree_rotate_left(ZixBTreeNode* const parent, const unsigned i) +{ + ZixBTreeNode* const lhs = zix_btree_child(parent, i); + ZixBTreeNode* const rhs = zix_btree_child(parent, i + 1); + + assert(lhs->is_leaf == rhs->is_leaf); + + if (lhs->is_leaf) { + // Move parent value to end of LHS + lhs->data.leaf.vals[lhs->n_vals++] = parent->data.inode.vals[i]; + + // Move first value in RHS to parent + parent->data.inode.vals[i] = + zix_btree_aerase(rhs->data.leaf.vals, rhs->n_vals, 0); + } else { + // Move parent value to end of LHS + lhs->data.inode.vals[lhs->n_vals++] = parent->data.inode.vals[i]; + + // Move first value in RHS to parent + parent->data.inode.vals[i] = + zix_btree_aerase(rhs->data.inode.vals, rhs->n_vals, 0); + + // Move first child pointer from RHS to end of LHS + lhs->data.inode.children[lhs->n_vals] = (ZixBTreeNode*)zix_btree_aerase( + (void**)rhs->data.inode.children, rhs->n_vals, 0); + } + + --rhs->n_vals; + + return lhs; +} + +/// Enlarge a child by stealing a value from its left sibling +static ZixBTreeNode* +zix_btree_rotate_right(ZixBTreeNode* const parent, const unsigned i) +{ + ZixBTreeNode* const lhs = zix_btree_child(parent, i - 1); + ZixBTreeNode* const rhs = zix_btree_child(parent, i); + + assert(lhs->is_leaf == rhs->is_leaf); + + if (lhs->is_leaf) { + // Prepend parent value to RHS + zix_btree_ainsert( + rhs->data.leaf.vals, rhs->n_vals++, 0, parent->data.inode.vals[i - 1]); + + // Move last value from LHS to parent + parent->data.inode.vals[i - 1] = lhs->data.leaf.vals[--lhs->n_vals]; + } else { + // Prepend parent value to RHS + zix_btree_ainsert( + rhs->data.inode.vals, rhs->n_vals++, 0, parent->data.inode.vals[i - 1]); + + // Move last child pointer from LHS and prepend to RHS + zix_btree_ainsert((void**)rhs->data.inode.children, + rhs->n_vals, + 0, + lhs->data.inode.children[lhs->n_vals]); + + // Move last value from LHS to parent + parent->data.inode.vals[i - 1] = lhs->data.inode.vals[--lhs->n_vals]; + } + + return rhs; +} + +/// Move n[i] down, merge the left and right child, return the merged node +static ZixBTreeNode* +zix_btree_merge(ZixBTree* const t, ZixBTreeNode* const n, const unsigned i) +{ + ZixBTreeNode* const lhs = zix_btree_child(n, i); + ZixBTreeNode* const rhs = zix_btree_child(n, i + 1); + + assert(lhs->is_leaf == rhs->is_leaf); + assert(lhs->n_vals + rhs->n_vals < zix_btree_max_vals(lhs)); + + // Move parent value to end of LHS + if (lhs->is_leaf) { + lhs->data.leaf.vals[lhs->n_vals++] = + zix_btree_aerase(n->data.inode.vals, n->n_vals, i); + } else { + lhs->data.inode.vals[lhs->n_vals++] = + zix_btree_aerase(n->data.inode.vals, n->n_vals, i); + } + + // Erase corresponding child pointer (to RHS) in parent + zix_btree_aerase((void**)n->data.inode.children, n->n_vals, i + 1U); + + // Add everything from RHS to end of LHS + if (lhs->is_leaf) { + memcpy(lhs->data.leaf.vals + lhs->n_vals, + rhs->data.leaf.vals, + rhs->n_vals * sizeof(void*)); + } else { + memcpy(lhs->data.inode.vals + lhs->n_vals, + rhs->data.inode.vals, + rhs->n_vals * sizeof(void*)); + memcpy(lhs->data.inode.children + lhs->n_vals, + rhs->data.inode.children, + (rhs->n_vals + 1U) * sizeof(void*)); + } + + lhs->n_vals = (ZixShort)(lhs->n_vals + rhs->n_vals); + + if (--n->n_vals == 0) { + // Root is now empty, replace it with its only child + assert(n == t->root); + t->root = lhs; + free(n); + } + + free(rhs); + return lhs; +} + +/// Remove and return the min value from the subtree rooted at `n` +static void* +zix_btree_remove_min(ZixBTree* const t, ZixBTreeNode* n) +{ + assert(zix_btree_can_remove_from(n)); + + while (!n->is_leaf) { + ZixBTreeNode* const* const children = n->data.inode.children; + + n = zix_btree_can_remove_from(children[0]) ? children[0] + : zix_btree_can_remove_from(children[1]) ? zix_btree_rotate_left(n, 0) + : zix_btree_merge(t, n, 0); + } + + return zix_btree_aerase(n->data.leaf.vals, --n->n_vals, 0); +} + +/// Remove and return the max value from the subtree rooted at `n` +static void* +zix_btree_remove_max(ZixBTree* const t, ZixBTreeNode* n) +{ + assert(zix_btree_can_remove_from(n)); + + while (!n->is_leaf) { + ZixBTreeNode* const* const children = n->data.inode.children; + + const unsigned y = n->n_vals - 1u; + const unsigned z = n->n_vals; + + n = zix_btree_can_remove_from(children[z]) ? children[z] + : zix_btree_can_remove_from(children[y]) ? zix_btree_rotate_right(n, z) + : zix_btree_merge(t, n, y); + } + + return n->data.leaf.vals[--n->n_vals]; +} + +static ZixBTreeNode* +zix_btree_fatten_child(ZixBTree* const t, ZixBTreeIter* const iter) +{ + ZixBTreeNode* const n = iter->nodes[iter->level]; + const ZixShort i = iter->indexes[iter->level]; + + assert(!n->is_leaf); + ZixBTreeNode* const* const children = n->data.inode.children; + + if (i > 0 && zix_btree_can_remove_from(children[i - 1u])) { + return zix_btree_rotate_right(n, i); // Steal a key from left sibling + } + + if (i < n->n_vals && zix_btree_can_remove_from(children[i + 1u])) { + return zix_btree_rotate_left(n, i); // Steal a key from right sibling + } + + // Both child's siblings are minimal, merge them + + if (i == n->n_vals) { + --iter->indexes[iter->level]; + return zix_btree_merge(t, n, i - 1u); // Merge last two children + } + + return zix_btree_merge(t, n, i); // Merge left and right siblings +} + +/// Replace the ith value in `n` with one from a child if possible +static ZixStatus +zix_btree_replace_value(ZixBTree* const t, + ZixBTreeNode* const n, + const unsigned i, + void** const out) +{ + ZixBTreeNode* const lhs = zix_btree_child(n, i); + ZixBTreeNode* const rhs = zix_btree_child(n, i + 1); + if (!zix_btree_can_remove_from(lhs) && !zix_btree_can_remove_from(rhs)) { + return ZIX_STATUS_NOT_FOUND; + } + + // Stash the value for the caller before it is replaced + *out = n->data.inode.vals[i]; + + n->data.inode.vals[i] = + // Left child has more values, steal its largest + (lhs->n_vals > rhs->n_vals) ? zix_btree_remove_max(t, lhs) + + // Right child has more values, steal its smallest + : (rhs->n_vals > lhs->n_vals) ? zix_btree_remove_min(t, rhs) + + // Children are balanced, use index parity as a low-bias tie breaker + : (i & 1u) ? zix_btree_remove_max(t, lhs) + : zix_btree_remove_min(t, rhs); + + return ZIX_STATUS_SUCCESS; +} + +ZixStatus +zix_btree_remove(ZixBTree* const t, + const void* const e, + void** const out, + ZixBTreeIter* const next) +{ + ZixBTreeNode* n = t->root; + ZixBTreeIter* ti = next; + ZixStatus st = ZIX_STATUS_SUCCESS; + + *ti = zix_btree_end_iter; + + /* To remove in a single walk down, the tree is adjusted along the way so + that the current node always has at least one more value than the + minimum. This ensures that there is always room to remove, without + having to merge nodes again on a traversal back up. */ + + if (!n->is_leaf && n->n_vals == 1u && + !zix_btree_can_remove_from(n->data.inode.children[0u]) && + !zix_btree_can_remove_from(n->data.inode.children[1u])) { + // Root has only two children, both minimal, merge them into a new root + n = zix_btree_merge(t, n, 0); + } + + while (!n->is_leaf) { + assert(n == t->root || zix_btree_can_remove_from(n)); + + // Search for the value in the current node and update the iterator + bool equal = false; + const unsigned i = zix_btree_inode_find(t, n, e, &equal); + + zix_btree_iter_set_frame(ti, n, i); + + if (equal) { + // Found in internal node + if (!(st = zix_btree_replace_value(t, n, i, out))) { + // Replaced hole with a value from a direct child + --t->size; + return st; + } + + // Both preceding and succeeding child are minimal, merge and continue + n = zix_btree_merge(t, n, i); + + } else { + // Not found in internal node, is in the ith child if anywhere + n = zix_btree_can_remove_from(zix_btree_child(n, i)) + ? zix_btree_child(n, i) + : zix_btree_fatten_child(t, ti); + } + + ++ti->level; + } + + // We're at the leaf the value may be in, search for the value in it + bool equal = false; + const unsigned i = zix_btree_leaf_find(t, n, e, &equal); + + if (!equal) { // Not found in tree + *ti = zix_btree_end_iter; + return ZIX_STATUS_NOT_FOUND; + } + + // Erase from leaf node + *out = zix_btree_aerase(n->data.leaf.vals, --n->n_vals, i); + + // Update next iterator + if (n->n_vals == 0u) { + // Removed the last element in the tree + assert(n == t->root); + assert(t->size == 1u); + *ti = zix_btree_end_iter; + } else if (i == n->n_vals) { + // Removed the largest element in this leaf, increment to the next + zix_btree_iter_set_frame(ti, n, i - 1); + zix_btree_iter_increment(ti); + } else { + zix_btree_iter_set_frame(ti, n, i); + } + + --t->size; + return ZIX_STATUS_SUCCESS; +} + +ZixStatus +zix_btree_find(const ZixBTree* const t, + const void* const e, + ZixBTreeIter* const ti) +{ + ZixBTreeNode* n = t->root; + + *ti = zix_btree_end_iter; + + while (!n->is_leaf) { + bool equal = false; + const unsigned i = zix_btree_inode_find(t, n, e, &equal); + + zix_btree_iter_set_frame(ti, n, i); + + if (equal) { + return ZIX_STATUS_SUCCESS; + } + + ++ti->level; + n = zix_btree_child(n, i); + } + + bool equal = false; + const unsigned i = zix_btree_leaf_find(t, n, e, &equal); + if (equal) { + zix_btree_iter_set_frame(ti, n, i); + return ZIX_STATUS_SUCCESS; + } + + *ti = zix_btree_end_iter; + return ZIX_STATUS_NOT_FOUND; +} + +ZixStatus +zix_btree_lower_bound(const ZixBTree* const t, + const ZixComparator compare, + const void* const compare_user_data, + const void* const key, + ZixBTreeIter* const ti) +{ + *ti = zix_btree_end_iter; + + ZixBTreeNode* n = t->root; // Current node + uint16_t found_level = 0u; // Lowest level a match was found at + bool found = false; // True if a match was ever found + + // Search down until we reach a leaf + while (!n->is_leaf) { + bool equal = false; + const unsigned i = zix_btree_find_pattern( + compare, compare_user_data, n->data.inode.vals, n->n_vals, key, &equal); + + zix_btree_iter_set_frame(ti, n, i); + if (equal) { + found_level = ti->level; + found = true; + } + + ++ti->level; + n = zix_btree_child(n, i); + } + + bool equal = false; + const unsigned i = zix_btree_find_pattern( + compare, compare_user_data, n->data.leaf.vals, n->n_vals, key, &equal); + + zix_btree_iter_set_frame(ti, n, i); + if (equal) { + return ZIX_STATUS_SUCCESS; + } + + if (ti->indexes[ti->level] == ti->nodes[ti->level]->n_vals) { + if (found) { + // Found on a previous level but went too far + ti->level = found_level; + } else { + // Reached end (key is greater than everything in tree) + *ti = zix_btree_end_iter; + } + } + + return ZIX_STATUS_SUCCESS; +} + +void* +zix_btree_get(const ZixBTreeIter ti) +{ + const ZixBTreeNode* const node = ti.nodes[ti.level]; + const unsigned index = ti.indexes[ti.level]; + + assert(node); + assert(index < node->n_vals); + + return node->is_leaf ? node->data.leaf.vals[index] + : node->data.inode.vals[index]; +} + +ZixBTreeIter +zix_btree_begin(const ZixBTree* const t) +{ + ZixBTreeIter iter = zix_btree_end_iter; + + if (t->size > 0u) { + ZixBTreeNode* n = t->root; + zix_btree_iter_set_frame(&iter, n, 0u); + + while (!n->is_leaf) { + n = zix_btree_child(n, 0); + zix_btree_iter_push(&iter, n, 0u); + } + } + + return iter; +} + +ZixBTreeIter +zix_btree_end(const ZixBTree* const t) +{ + (void)t; + + return zix_btree_end_iter; +} + +bool +zix_btree_iter_equals(const ZixBTreeIter lhs, const ZixBTreeIter rhs) +{ + const size_t indexes_size = (lhs.level + 1u) * sizeof(uint16_t); + + return (lhs.level == rhs.level) && (lhs.nodes[0] == rhs.nodes[0]) && + (!lhs.nodes[0] || !memcmp(lhs.indexes, rhs.indexes, indexes_size)); +} + +ZixStatus +zix_btree_iter_increment(ZixBTreeIter* const i) +{ + assert(!zix_btree_iter_is_end(*i)); + + // Move to the next value in the current node + const uint16_t index = ++i->indexes[i->level]; + + if (i->nodes[i->level]->is_leaf) { + // Leaf, move up if necessary until we're not at the end of the node + while (i->indexes[i->level] >= i->nodes[i->level]->n_vals) { + if (i->level == 0) { + // End of root, end of tree + i->nodes[0] = NULL; + return ZIX_STATUS_REACHED_END; + } + + // At end of internal node, move up + zix_btree_iter_pop(i); + } + + } else { + // Internal node, move down to next child + const ZixBTreeNode* const node = i->nodes[i->level]; + ZixBTreeNode* const child = node->data.inode.children[index]; + + zix_btree_iter_push(i, child, 0u); + + // Move down and left until we hit a leaf + while (!i->nodes[i->level]->is_leaf) { + zix_btree_iter_push(i, i->nodes[i->level]->data.inode.children[0], 0u); + } + } + + return ZIX_STATUS_SUCCESS; +} + +ZixBTreeIter +zix_btree_iter_next(const ZixBTreeIter iter) +{ + ZixBTreeIter next = iter; + + zix_btree_iter_increment(&next); + + return next; +} diff --git a/src/zix/btree.h b/src/zix/btree.h new file mode 100644 index 00000000..3576789c --- /dev/null +++ b/src/zix/btree.h @@ -0,0 +1,213 @@ +/* + Copyright 2011-2020 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef ZIX_BTREE_H +#define ZIX_BTREE_H + +#include "zix/common.h" + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @addtogroup zix + @{ + @name BTree + @{ +*/ + +/** + The maximum height of a ZixBTree. + + This is exposed because it determines the size of iterators, which are + statically sized so they can used on the stack. The usual degree (or + "fanout") of a B-Tree is high enough that a relatively short tree can + contain many elements. With the default page size of 4 KiB, the default + height of 6 is enough to store trillions. +*/ +#ifndef ZIX_BTREE_MAX_HEIGHT +# define ZIX_BTREE_MAX_HEIGHT 6u +#endif + +/// A B-Tree +typedef struct ZixBTreeImpl ZixBTree; + +/// A B-Tree node (opaque) +typedef struct ZixBTreeNodeImpl ZixBTreeNode; + +/** + An iterator over a B-Tree. + + Note that modifying the tree invalidates all iterators. + + The contents of this type are considered an implementation detail and should + not be used directly by clients. They are nevertheless exposed here so that + iterators can be allocated on the stack. +*/ +typedef struct { + ZixBTreeNode* nodes[ZIX_BTREE_MAX_HEIGHT]; ///< Parallel node pointer stack + uint16_t indexes[ZIX_BTREE_MAX_HEIGHT]; ///< Parallel child index stack + uint16_t level; ///< Current level in stack +} ZixBTreeIter; + +/// A static end iterator for convenience +static const ZixBTreeIter zix_btree_end_iter = { + {NULL, NULL, NULL, NULL, NULL, NULL}, + {0u, 0u, 0u, 0u, 0u, 0u}, + 0u}; + +/** + Create a new (empty) B-Tree. + + The given comparator must be a total ordering and is used to internally + organize the tree and look for values exactly. + + Searching can be done with a custom comparator that supports wildcards, see + zix_btree_lower_bound() for details. +*/ +ZIX_API +ZixBTree* +zix_btree_new(ZixComparator cmp, const void* cmp_data); + +/** + Free `t` and all the nodes it contains. + + @param destroy Function to call once for every value in the tree. This can + be used to free values if they are dynamically allocated. +*/ +ZIX_API +void +zix_btree_free(ZixBTree* t, ZixDestroyFunc destroy); + +/** + Clear everything from `t`, leaving it empty. + + @param destroy Function called exactly once for every value in the tree, + just before that value is removed from the tree. +*/ +ZIX_API +void +zix_btree_clear(ZixBTree* t, ZixDestroyFunc destroy); + +/// Return the number of elements in `t` +ZIX_PURE_API +size_t +zix_btree_size(const ZixBTree* t); + +/// Insert the element `e` into `t` +ZIX_API +ZixStatus +zix_btree_insert(ZixBTree* t, void* e); + +/** + Remove the value `e` from `t`. + + @param t Tree to remove from. + + @param e Value to remove. + + @param out Set to point to the removed pointer (which may not equal `e`). + + @param next On successful return, set to point at the value that immediately + followed `e`. +*/ +ZIX_API +ZixStatus +zix_btree_remove(ZixBTree* t, const void* e, void** out, ZixBTreeIter* next); + +/** + Set `ti` to an element exactly equal to `e` in `t`. + + If no such item exists, `ti` is set to the end. +*/ +ZIX_API +ZixStatus +zix_btree_find(const ZixBTree* t, const void* e, ZixBTreeIter* ti); + +/** + Set `ti` to the smallest element in `t` that is not less than `e`. + + The given comparator must be compatible with the tree comparator, that is, + any two values must have the same ordering according to both. Within this + constraint, it may implement fuzzier searching by handling special search + key values, for example with wildcards. + + If the search key `e` compares equal to many values in the tree, then `ti` + will be set to the least such element. + + The comparator is always called with an actual value in the tree as the + first argument, and `key` as the second argument. +*/ +ZIX_API +ZixStatus +zix_btree_lower_bound(const ZixBTree* t, + ZixComparator compare, + const void* compare_user_data, + const void* key, + ZixBTreeIter* ti); + +/// Return the data at the given position in the tree +ZIX_PURE_API +void* +zix_btree_get(ZixBTreeIter ti); + +/// Return an iterator to the first (smallest) element in `t` +ZIX_PURE_API +ZixBTreeIter +zix_btree_begin(const ZixBTree* t); + +/// Return an iterator to the end of `t` (one past the last element) +ZIX_CONST_API +ZixBTreeIter +zix_btree_end(const ZixBTree* t); + +/// Return true iff `lhs` is equal to `rhs` +ZIX_PURE_API +bool +zix_btree_iter_equals(ZixBTreeIter lhs, ZixBTreeIter rhs); + +/// Return true iff `i` is an iterator at the end of a tree +static inline bool +zix_btree_iter_is_end(const ZixBTreeIter i) +{ + return i.level == 0 && !i.nodes[0]; +} + +/// Increment `i` to point to the next element in the tree +ZIX_API +ZixStatus +zix_btree_iter_increment(ZixBTreeIter* i); + +/// Return an iterator one past `iter` +ZIX_PURE_API +ZixBTreeIter +zix_btree_iter_next(ZixBTreeIter iter); + +/** + @} + @} +*/ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* ZIX_BTREE_H */ diff --git a/src/zix/common.h b/src/zix/common.h new file mode 100644 index 00000000..02f00b45 --- /dev/null +++ b/src/zix/common.h @@ -0,0 +1,137 @@ +/* + Copyright 2016-2020 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef ZIX_COMMON_H +#define ZIX_COMMON_H + +#include + +/** + @addtogroup zix + @{ +*/ + +/** @cond */ +#ifndef ZIX_API +# if defined(_WIN32) && !defined(ZIX_STATIC) && defined(ZIX_INTERNAL) +# define ZIX_API __declspec(dllexport) +# elif defined(_WIN32) && !defined(ZIX_STATIC) +# define ZIX_API __declspec(dllimport) +# elif defined(__GNUC__) +# define ZIX_API __attribute__((visibility("default"))) +# else +# define ZIX_API +# endif +#endif + +#ifdef __GNUC__ +# define ZIX_PURE_FUNC __attribute__((pure)) +# define ZIX_CONST_FUNC __attribute__((const)) +# define ZIX_MALLOC_FUNC __attribute__((malloc)) +#else +# define ZIX_PURE_FUNC +# define ZIX_CONST_FUNC +# define ZIX_MALLOC_FUNC +#endif + +#define ZIX_PURE_API \ + ZIX_API \ + ZIX_PURE_FUNC + +#define ZIX_CONST_API \ + ZIX_API \ + ZIX_CONST_FUNC + +#define ZIX_MALLOC_API \ + ZIX_API \ + ZIX_MALLOC_FUNC + +/** @endcond */ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __GNUC__ +# define ZIX_LOG_FUNC(fmt, arg1) __attribute__((format(printf, fmt, arg1))) +#else +# define ZIX_LOG_FUNC(fmt, arg1) +#endif + +// Unused parameter macro to suppresses warnings and make it impossible to use +#if defined(__cplusplus) +# define ZIX_UNUSED(name) +#elif defined(__GNUC__) +# define ZIX_UNUSED(name) name##_unused __attribute__((__unused__)) +#else +# define ZIX_UNUSED(name) name +#endif + +typedef enum { + ZIX_STATUS_SUCCESS, + ZIX_STATUS_ERROR, + ZIX_STATUS_NO_MEM, + ZIX_STATUS_NOT_FOUND, + ZIX_STATUS_EXISTS, + ZIX_STATUS_BAD_ARG, + ZIX_STATUS_BAD_PERMS, + ZIX_STATUS_REACHED_END +} ZixStatus; + +static inline const char* +zix_strerror(const ZixStatus status) +{ + switch (status) { + case ZIX_STATUS_SUCCESS: + return "Success"; + case ZIX_STATUS_ERROR: + return "Unknown error"; + case ZIX_STATUS_NO_MEM: + return "Out of memory"; + case ZIX_STATUS_NOT_FOUND: + return "Not found"; + case ZIX_STATUS_EXISTS: + return "Exists"; + case ZIX_STATUS_BAD_ARG: + return "Bad argument"; + case ZIX_STATUS_BAD_PERMS: + return "Bad permissions"; + case ZIX_STATUS_REACHED_END: + return "Reached end"; + } + return "Unknown error"; +} + +/// Function for comparing two elements +typedef int (*ZixComparator)(const void* a, + const void* b, + const void* user_data); + +/// Function for testing equality of two elements +typedef bool (*ZixEqualFunc)(const void* a, const void* b); + +/// Function to destroy an element +typedef void (*ZixDestroyFunc)(void* ptr); + +/** + @} +*/ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* ZIX_COMMON_H */ diff --git a/src/zix/digest.c b/src/zix/digest.c new file mode 100644 index 00000000..47d27b94 --- /dev/null +++ b/src/zix/digest.c @@ -0,0 +1,141 @@ +/* + Copyright 2012-2020 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "zix/digest.h" + +#ifdef __SSE4_2__ +# include +#endif + +#include +#include + +#ifdef __SSE4_2__ + +// SSE 4.2 CRC32 + +uint32_t +zix_digest_start(void) +{ + return 1; +} + +uint32_t +zix_digest_add(uint32_t hash, const void* const buf, const size_t len) +{ + const uint8_t* str = (const uint8_t*)buf; + +# ifdef __x86_64__ + for (size_t i = 0; i < (len / sizeof(uint64_t)); ++i) { + hash = (uint32_t)_mm_crc32_u64(hash, *(const uint64_t*)str); + str += sizeof(uint64_t); + } + if (len & sizeof(uint32_t)) { + hash = _mm_crc32_u32(hash, *(const uint32_t*)str); + str += sizeof(uint32_t); + } +# else + for (size_t i = 0; i < (len / sizeof(uint32_t)); ++i) { + hash = _mm_crc32_u32(hash, *(const uint32_t*)str); + str += sizeof(uint32_t); + } +# endif + if (len & sizeof(uint16_t)) { + hash = _mm_crc32_u16(hash, *(const uint16_t*)str); + str += sizeof(uint16_t); + } + if (len & sizeof(uint8_t)) { + hash = _mm_crc32_u8(hash, *(const uint8_t*)str); + } + + return hash; +} + +uint32_t +zix_digest_add_64(uint32_t hash, const void* const buf, const size_t len) +{ + assert((uintptr_t)buf % sizeof(uint64_t) == 0); + assert(len % sizeof(uint64_t) == 0); + +# ifdef __x86_64__ + const uint64_t* ptr = (const uint64_t*)buf; + + for (size_t i = 0; i < (len / sizeof(uint64_t)); ++i) { + hash = (uint32_t)_mm_crc32_u64(hash, *ptr); + ++ptr; + } + + return hash; +# else + const uint32_t* ptr = (const uint32_t*)buf; + + for (size_t i = 0; i < (len / sizeof(uint32_t)); ++i) { + hash = _mm_crc32_u32(hash, *ptr); + ++ptr; + } + + return hash; +# endif +} + +uint32_t +zix_digest_add_ptr(const uint32_t hash, const void* const ptr) +{ +# ifdef __x86_64__ + return (uint32_t)_mm_crc32_u64(hash, (uintptr_t)ptr); +# else + return _mm_crc32_u32(hash, (uintptr_t)ptr); +# endif +} + +#else + +// Classic DJB hash + +uint32_t +zix_digest_start(void) +{ + return 5381; +} + +uint32_t +zix_digest_add(uint32_t hash, const void* const buf, const size_t len) +{ + const uint8_t* str = (const uint8_t*)buf; + + for (size_t i = 0; i < len; ++i) { + hash = (hash << 5u) + hash + str[i]; + } + + return hash; +} + +uint32_t +zix_digest_add_64(uint32_t hash, const void* const buf, const size_t len) +{ + assert((uintptr_t)buf % sizeof(uint64_t) == 0); + assert(len % sizeof(uint64_t) == 0); + + return zix_digest_add(hash, buf, len); +} + +uint32_t +zix_digest_add_ptr(const uint32_t hash, const void* const ptr) +{ + return zix_digest_add(hash, &ptr, sizeof(ptr)); +} + +#endif diff --git a/src/zix/digest.h b/src/zix/digest.h new file mode 100644 index 00000000..1fde77a9 --- /dev/null +++ b/src/zix/digest.h @@ -0,0 +1,67 @@ +/* + Copyright 2012-2020 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef ZIX_DIGEST_H +#define ZIX_DIGEST_H + +#include "zix/common.h" + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + Return an initial empty digest value. +*/ +ZIX_CONST_API +uint32_t +zix_digest_start(void); + +/** + Update `hash` to include `buf`, a buffer of `len` bytes. + + This can be used for any size or alignment. +*/ +ZIX_PURE_API +uint32_t +zix_digest_add(uint32_t hash, const void* buf, size_t len); + +/** + Update `hash` to include `buf`, a 64-bit aligned buffer of `len` bytes. + + Both `buf` and `len` must be evenly divisible by 8 (64 bits). +*/ +ZIX_PURE_API +uint32_t +zix_digest_add_64(uint32_t hash, const void* buf, size_t len); + +/** + Update `hash` to include `ptr`. + + This hashes the value of the pointer itself, and does not dereference `ptr`. +*/ +ZIX_CONST_API +uint32_t +zix_digest_add_ptr(uint32_t hash, const void* ptr); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* ZIX_DIGEST_H */ diff --git a/src/zix/hash.c b/src/zix/hash.c new file mode 100644 index 00000000..d9556edc --- /dev/null +++ b/src/zix/hash.c @@ -0,0 +1,230 @@ +/* + Copyright 2011-2020 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "zix/hash.h" + +#include +#include +#include + +/** + Primes, each slightly less than twice its predecessor, and as far away + from powers of two as possible. +*/ +static const unsigned sizes[] = { + 53, 97, 193, 389, 769, 1543, 3079, + 6151, 12289, 24593, 49157, 98317, 196613, 393241, + 786433, 1572869, 3145739, 6291469, 12582917, 25165843, 50331653, + 100663319, 201326611, 402653189, 805306457, 1610612741, 0}; + +typedef struct ZixHashEntry { + struct ZixHashEntry* next; ///< Next entry in bucket + uint32_t hash; ///< Non-modulo hash value + // Value follows here (access with zix_hash_value) +} ZixHashEntry; + +struct ZixHashImpl { + ZixHashFunc hash_func; + ZixEqualFunc equal_func; + ZixHashEntry** buckets; + const unsigned* n_buckets; + size_t value_size; + unsigned count; +}; + +static inline void* +zix_hash_value(ZixHashEntry* entry) +{ + return entry + 1; +} + +ZixHash* +zix_hash_new(ZixHashFunc hash_func, ZixEqualFunc equal_func, size_t value_size) +{ + ZixHash* hash = (ZixHash*)malloc(sizeof(ZixHash)); + if (hash) { + hash->hash_func = hash_func; + hash->equal_func = equal_func; + hash->n_buckets = &sizes[0]; + hash->value_size = value_size; + hash->count = 0; + if (!(hash->buckets = + (ZixHashEntry**)calloc(*hash->n_buckets, sizeof(ZixHashEntry*)))) { + free(hash); + return NULL; + } + } + return hash; +} + +void +zix_hash_free(ZixHash* hash) +{ + if (!hash) { + return; + } + + for (unsigned b = 0; b < *hash->n_buckets; ++b) { + ZixHashEntry* bucket = hash->buckets[b]; + for (ZixHashEntry* e = bucket; e;) { + ZixHashEntry* next = e->next; + free(e); + e = next; + } + } + + free(hash->buckets); + free(hash); +} + +size_t +zix_hash_size(const ZixHash* hash) +{ + return hash->count; +} + +static inline void +insert_entry(ZixHashEntry** bucket, ZixHashEntry* entry) +{ + entry->next = *bucket; + *bucket = entry; +} + +static inline ZixStatus +rehash(ZixHash* hash, unsigned new_n_buckets) +{ + ZixHashEntry** new_buckets = + (ZixHashEntry**)calloc(new_n_buckets, sizeof(ZixHashEntry*)); + if (!new_buckets) { + return ZIX_STATUS_NO_MEM; + } + + const unsigned old_n_buckets = *hash->n_buckets; + for (unsigned b = 0; b < old_n_buckets; ++b) { + for (ZixHashEntry* e = hash->buckets[b]; e;) { + ZixHashEntry* const next = e->next; + const unsigned h = e->hash % new_n_buckets; + insert_entry(&new_buckets[h], e); + e = next; + } + } + + free(hash->buckets); + hash->buckets = new_buckets; + + return ZIX_STATUS_SUCCESS; +} + +static inline ZixHashEntry* +find_entry(const ZixHash* hash, + const void* key, + const unsigned h, + const unsigned h_nomod) +{ + for (ZixHashEntry* e = hash->buckets[h]; e; e = e->next) { + if (e->hash == h_nomod && hash->equal_func(zix_hash_value(e), key)) { + return e; + } + } + return NULL; +} + +void* +zix_hash_find(const ZixHash* hash, const void* value) +{ + const unsigned h_nomod = hash->hash_func(value); + const unsigned h = h_nomod % *hash->n_buckets; + ZixHashEntry* const entry = find_entry(hash, value, h, h_nomod); + return entry ? zix_hash_value(entry) : 0; +} + +ZixStatus +zix_hash_insert(ZixHash* hash, const void* value, void** inserted) +{ + unsigned h_nomod = hash->hash_func(value); + unsigned h = h_nomod % *hash->n_buckets; + + ZixHashEntry* elem = find_entry(hash, value, h, h_nomod); + if (elem) { + assert(elem->hash == h_nomod); + if (inserted) { + *inserted = zix_hash_value(elem); + } + return ZIX_STATUS_EXISTS; + } + + elem = (ZixHashEntry*)malloc(sizeof(ZixHashEntry) + hash->value_size); + if (!elem) { + return ZIX_STATUS_NO_MEM; + } + elem->next = NULL; + elem->hash = h_nomod; + memcpy(elem + 1, value, hash->value_size); + + const unsigned next_n_buckets = *(hash->n_buckets + 1); + if (next_n_buckets != 0 && (hash->count + 1) >= next_n_buckets) { + if (!rehash(hash, next_n_buckets)) { + h = h_nomod % *(++hash->n_buckets); + } + } + + insert_entry(&hash->buckets[h], elem); + ++hash->count; + if (inserted) { + *inserted = zix_hash_value(elem); + } + return ZIX_STATUS_SUCCESS; +} + +ZixStatus +zix_hash_remove(ZixHash* hash, const void* value) +{ + const unsigned h_nomod = hash->hash_func(value); + const unsigned h = h_nomod % *hash->n_buckets; + + ZixHashEntry** next_ptr = &hash->buckets[h]; + for (ZixHashEntry* e = hash->buckets[h]; e; e = e->next) { + if (h_nomod == e->hash && hash->equal_func(zix_hash_value(e), value)) { + *next_ptr = e->next; + free(e); + --hash->count; + return ZIX_STATUS_SUCCESS; + } + next_ptr = &e->next; + } + + if (hash->n_buckets != sizes) { + const unsigned prev_n_buckets = *(hash->n_buckets - 1); + if (hash->count - 1 <= prev_n_buckets) { + if (!rehash(hash, prev_n_buckets)) { + --hash->n_buckets; + } + } + } + + return ZIX_STATUS_NOT_FOUND; +} + +void +zix_hash_foreach(ZixHash* hash, ZixHashVisitFunc f, void* user_data) +{ + for (unsigned b = 0; b < *hash->n_buckets; ++b) { + ZixHashEntry* bucket = hash->buckets[b]; + for (ZixHashEntry* e = bucket; e; e = e->next) { + f(zix_hash_value(e), user_data); + } + } +} diff --git a/src/zix/hash.h b/src/zix/hash.h new file mode 100644 index 00000000..39050381 --- /dev/null +++ b/src/zix/hash.h @@ -0,0 +1,138 @@ +/* + Copyright 2011-2020 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef ZIX_HASH_H +#define ZIX_HASH_H + +#include "zix/common.h" + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @addtogroup zix + @{ + @name Hash + @{ +*/ + +typedef struct ZixHashImpl ZixHash; + +/** + Function for computing the hash of an element. +*/ +typedef uint32_t (*ZixHashFunc)(const void* value); + +/** + Function to visit a hash element. +*/ +typedef void (*ZixHashVisitFunc)(void* value, void* user_data); + +/** + Create a new hash table. + + To minimize space overhead, unlike many hash tables this stores a single + value, not a key and a value. Any size of value can be stored, but all the + values in the hash table must be the same size, and the values must be safe + to copy with memcpy. To get key:value behaviour, simply insert a struct + with a key and value into the hash. + + @param hash_func The hashing function. + @param equal_func A function to test value equality. + @param value_size The size of the values to be stored. +*/ +ZIX_API +ZixHash* +zix_hash_new(ZixHashFunc hash_func, ZixEqualFunc equal_func, size_t value_size); + +/** + Free `hash`. +*/ +ZIX_API +void +zix_hash_free(ZixHash* hash); + +/** + Return the number of elements in `hash`. +*/ +ZIX_PURE_API +size_t +zix_hash_size(const ZixHash* hash); + +/** + Insert an item into `hash`. + + If no matching value is found, ZIX_STATUS_SUCCESS will be returned, and @p + inserted will be pointed to the copy of `value` made in the new hash node. + + If a matching value already exists, ZIX_STATUS_EXISTS will be returned, and + `inserted` will be pointed to the existing value. + + @param hash The hash table. + @param value The value to be inserted. + @param inserted The copy of `value` in the hash table. + @return ZIX_STATUS_SUCCESS, ZIX_STATUS_EXISTS, or ZIX_STATUS_NO_MEM. +*/ +ZIX_API +ZixStatus +zix_hash_insert(ZixHash* hash, const void* value, void** inserted); + +/** + Remove an item from `hash`. + + @param hash The hash table. + @param value The value to remove. + @return ZIX_STATUS_SUCCES or ZIX_STATUS_NOT_FOUND. +*/ +ZIX_API +ZixStatus +zix_hash_remove(ZixHash* hash, const void* value); + +/** + Search for an item in `hash`. + + @param hash The hash table. + @param value The value to search for. +*/ +ZIX_API +void* +zix_hash_find(const ZixHash* hash, const void* value); + +/** + Call `f` on each value in `hash`. + + @param hash The hash table. + @param f The function to call on each value. + @param user_data The user_data parameter passed to `f`. +*/ +ZIX_API +void +zix_hash_foreach(ZixHash* hash, ZixHashVisitFunc f, void* user_data); + +/** + @} + @} +*/ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* ZIX_HASH_H */ -- cgit v1.2.1