From da55a282f67e50c1b647274a6fe6836747b2fe2f Mon Sep 17 00:00:00 2001 From: David Robillard Date: Wed, 11 Nov 2020 21:49:13 +0100 Subject: Update BTree --- src/zix/btree.c | 290 +++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 194 insertions(+), 96 deletions(-) (limited to 'src/zix/btree.c') diff --git a/src/zix/btree.c b/src/zix/btree.c index 3dc2d57..ba2ad0d 100644 --- a/src/zix/btree.c +++ b/src/zix/btree.c @@ -1,5 +1,5 @@ /* - Copyright 2011-2019 David Robillard + Copyright 2011-2020 David Robillard Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above @@ -45,10 +45,23 @@ struct ZixBTreeNodeImpl { uint16_t is_leaf; uint16_t n_vals; // On 64-bit we rely on some padding here to get page-sized nodes - void* vals[ZIX_BTREE_INODE_VALS]; // ZIX_BTREE_LEAF_VALS for leaves - ZixBTreeNode* children[ZIX_BTREE_INODE_VALS + 1]; // Nonexistent for leaves + union { + struct { + void* vals[ZIX_BTREE_LEAF_VALS]; + } leaf; + + struct { + void* vals[ZIX_BTREE_INODE_VALS]; + ZixBTreeNode* children[ZIX_BTREE_INODE_VALS + 1]; + } inode; + } data; }; +#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112l) || \ + (defined(__cplusplus) && __cplusplus >= 201103L) +static_assert(sizeof(ZixBTreeNode) == ZIX_BTREE_PAGE_SIZE, ""); +#endif + typedef struct { ZixBTreeNode* node; unsigned index; @@ -85,7 +98,7 @@ print_tree(const ZixBTreeNode* parent, const ZixBTreeNode* node, int level) print_node(node, ""); if (!node->is_leaf) { for (uint16_t i = 0; i < node->n_vals + 1; ++i) { - print_tree(node, node->children[i], level + 1); + print_tree(node, node->data.inode.children[i], level + 1); } } if (!parent) { @@ -99,7 +112,11 @@ print_tree(const ZixBTreeNode* parent, const ZixBTreeNode* node, int level) ZIX_PRIVATE ZixBTreeNode* zix_btree_node_new(const bool leaf) { +#if !((defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112l) || \ + (defined(__cplusplus) && __cplusplus >= 201103L)) assert(sizeof(ZixBTreeNode) == ZIX_BTREE_PAGE_SIZE); +#endif + ZixBTreeNode* node = (ZixBTreeNode*)malloc(sizeof(ZixBTreeNode)); if (node) { node->is_leaf = leaf; @@ -108,6 +125,23 @@ zix_btree_node_new(const bool leaf) return node; } +ZIX_PRIVATE +void* +zix_btree_value(const ZixBTreeNode* const node, const unsigned i) +{ + assert(i < node->n_vals); + return node->is_leaf ? node->data.leaf.vals[i] : node->data.inode.vals[i]; +} + +ZIX_PRIVATE +ZixBTreeNode* +zix_btree_child(const ZixBTreeNode* const node, const unsigned i) +{ + assert(!node->is_leaf); + assert(i <= ZIX_BTREE_INODE_VALS); + return node->data.inode.children[i]; +} + ZixBTree* zix_btree_new(const ZixComparator cmp, const void* const cmp_data, @@ -133,14 +167,21 @@ ZIX_PRIVATE void zix_btree_free_rec(ZixBTree* const t, ZixBTreeNode* const n) { if (n) { - if (t->destroy) { - for (uint16_t i = 0; i < n->n_vals; ++i) { - t->destroy(n->vals[i]); + if (n->is_leaf) { + if (t->destroy) { + for (uint16_t i = 0; i < n->n_vals; ++i) { + t->destroy(n->data.leaf.vals[i]); + } } - } - if (!n->is_leaf) { + } else { + if (t->destroy) { + for (uint16_t i = 0; i < n->n_vals; ++i) { + t->destroy(n->data.inode.vals[i]); + } + } + for (uint16_t i = 0; i < n->n_vals + 1; ++i) { - zix_btree_free_rec(t, n->children[i]); + zix_btree_free_rec(t, zix_btree_child(n, i)); } } free(n); @@ -203,7 +244,7 @@ zix_btree_split_child(ZixBTreeNode* const n, assert(lhs->n_vals == zix_btree_max_vals(lhs)); assert(n->n_vals < ZIX_BTREE_INODE_VALS); assert(i < n->n_vals + 1U); - assert(n->children[i] == lhs); + assert(zix_btree_child(n, i) == lhs); const uint16_t max_n_vals = zix_btree_max_vals(lhs); ZixBTreeNode* rhs = zix_btree_node_new(lhs->is_leaf); @@ -215,23 +256,35 @@ zix_btree_split_child(ZixBTreeNode* const n, lhs->n_vals = max_n_vals / 2U; rhs->n_vals = (uint16_t)(max_n_vals - lhs->n_vals - 1); - // Copy large half of values from LHS to new RHS node - memcpy(rhs->vals, - lhs->vals + lhs->n_vals + 1, - rhs->n_vals * sizeof(void*)); - - // Copy large half of children from LHS to new RHS node - if (!lhs->is_leaf) { - memcpy(rhs->children, - lhs->children + lhs->n_vals + 1, + if (lhs->is_leaf) { + // Copy large half from LHS to new RHS node + memcpy(rhs->data.leaf.vals, + lhs->data.leaf.vals + lhs->n_vals + 1, + rhs->n_vals * sizeof(void*)); + + // Move middle value up to parent + zix_btree_ainsert(n->data.inode.vals, + n->n_vals, + i, + lhs->data.leaf.vals[lhs->n_vals]); + } else { + // Copy large half from LHS to new RHS node + memcpy(rhs->data.inode.vals, + lhs->data.inode.vals + lhs->n_vals + 1, + rhs->n_vals * sizeof(void*)); + memcpy(rhs->data.inode.children, + lhs->data.inode.children + lhs->n_vals + 1, (rhs->n_vals + 1U) * sizeof(ZixBTreeNode*)); - } - // Move middle value up to parent - zix_btree_ainsert(n->vals, n->n_vals, i, lhs->vals[lhs->n_vals]); + // Move middle value up to parent + zix_btree_ainsert(n->data.inode.vals, + n->n_vals, + i, + lhs->data.inode.vals[lhs->n_vals]); + } // Insert new RHS node in parent at position i - zix_btree_ainsert((void**)n->children, ++n->n_vals, i + 1U, rhs); + zix_btree_ainsert((void**)n->data.inode.children, ++n->n_vals, i + 1U, rhs); return rhs; } @@ -247,10 +300,11 @@ zix_btree_node_is_sorted_with_respect_to(const ZixBTree* const t, return true; } - int cmp = t->cmp(n->vals[0], e, t->cmp_data); + int cmp = t->cmp(zix_btree_value(n, 0), e, t->cmp_data); for (uint16_t i = 1; i < n->n_vals; ++i) { - const int next_cmp = t->cmp(n->vals[i], e, t->cmp_data); - if ((cmp >= 0 && next_cmp < 0)) { + const int next_cmp = t->cmp(zix_btree_value(n, i), e, t->cmp_data); + if ((cmp >= 0 && next_cmp < 0) || + (cmp > 0 && next_cmp <= 0)) { return false; } cmp = next_cmp; @@ -276,7 +330,7 @@ zix_btree_node_find(const ZixBTree* const t, while (len > 0) { const unsigned half = len >> 1U; const unsigned i = first + half; - const int cmp = t->cmp(n->vals[i], e, t->cmp_data); + const int cmp = t->cmp(zix_btree_value(n, i), e, t->cmp_data); if (cmp == 0) { *equal = true; len = half; // Keep searching for wildcard matches @@ -288,7 +342,7 @@ zix_btree_node_find(const ZixBTree* const t, len = half; } } - assert(!*equal || t->cmp(n->vals[first], e, t->cmp_data) == 0); + assert(!*equal || t->cmp(zix_btree_value(n, first), e, t->cmp_data) == 0); return first; } @@ -306,8 +360,8 @@ zix_btree_insert(ZixBTree* const t, void* const e) if (!(parent = zix_btree_node_new(false))) { return ZIX_STATUS_NO_MEM; } - t->root = parent; - parent->children[0] = n; + t->root = parent; + parent->data.inode.children[0] = n; ++t->height; } @@ -316,7 +370,7 @@ zix_btree_insert(ZixBTree* const t, void* const e) return ZIX_STATUS_NO_MEM; } - const int cmp = t->cmp(parent->vals[i], e, t->cmp_data); + const int cmp = t->cmp(parent->data.inode.vals[i], e, t->cmp_data); if (cmp == 0) { return ZIX_STATUS_EXISTS; } else if (cmp < 0) { @@ -326,7 +380,7 @@ zix_btree_insert(ZixBTree* const t, void* const e) } } - assert(!parent || parent->children[i] == n); + assert(!parent || zix_btree_child(parent, i) == n); bool equal = false; i = zix_btree_node_find(t, n, e, &equal); @@ -335,10 +389,10 @@ zix_btree_insert(ZixBTree* const t, void* const e) } else if (!n->is_leaf) { // Descend to child node left of value parent = n; - n = n->children[i]; + n = zix_btree_child(n, i); } else { // Insert into internal node - zix_btree_ainsert(n->vals, n->n_vals++, i, e); + zix_btree_ainsert(n->data.leaf.vals, n->n_vals++, i, e); break; } } @@ -382,20 +436,32 @@ zix_btree_node_is_minimal(ZixBTreeNode* const n) ZIX_PRIVATE ZixBTreeNode* zix_btree_rotate_left(ZixBTreeNode* const parent, const unsigned i) { - ZixBTreeNode* const lhs = parent->children[i]; - ZixBTreeNode* const rhs = parent->children[i + 1]; + ZixBTreeNode* const lhs = zix_btree_child(parent, i); + ZixBTreeNode* const rhs = zix_btree_child(parent, i + 1); - // Move parent value to end of LHS - lhs->vals[lhs->n_vals++] = parent->vals[i]; + assert(lhs->is_leaf == rhs->is_leaf); + + if (lhs->is_leaf) { + // Move parent value to end of LHS + lhs->data.leaf.vals[lhs->n_vals++] = parent->data.inode.vals[i]; - // Move first child pointer from RHS to end of LHS - if (!lhs->is_leaf) { - lhs->children[lhs->n_vals] = (ZixBTreeNode*)zix_btree_aerase( - (void**)rhs->children, rhs->n_vals, 0); + // Move first value in RHS to parent + parent->data.inode.vals[i] = + zix_btree_aerase(rhs->data.leaf.vals, rhs->n_vals, 0); + } else { + // Move parent value to end of LHS + lhs->data.inode.vals[lhs->n_vals++] = parent->data.inode.vals[i]; + + // Move first value in RHS to parent + parent->data.inode.vals[i] = + zix_btree_aerase(rhs->data.inode.vals, rhs->n_vals, 0); + + // Move first child pointer from RHS to end of LHS + lhs->data.inode.children[lhs->n_vals] = (ZixBTreeNode*)zix_btree_aerase( + (void**)rhs->data.inode.children, rhs->n_vals, 0); } - // Move first value in RHS to parent - parent->vals[i] = zix_btree_aerase(rhs->vals, --rhs->n_vals, 0); + --rhs->n_vals; return lhs; } @@ -404,22 +470,36 @@ zix_btree_rotate_left(ZixBTreeNode* const parent, const unsigned i) ZIX_PRIVATE ZixBTreeNode* zix_btree_rotate_right(ZixBTreeNode* const parent, const unsigned i) { - ZixBTreeNode* const lhs = parent->children[i - 1]; - ZixBTreeNode* const rhs = parent->children[i]; + ZixBTreeNode* const lhs = zix_btree_child(parent, i - 1); + ZixBTreeNode* const rhs = zix_btree_child(parent, i); - // Prepend parent value to RHS - zix_btree_ainsert(rhs->vals, rhs->n_vals++, 0, parent->vals[i - 1]); + assert(lhs->is_leaf == rhs->is_leaf); + + if (lhs->is_leaf) { + // Prepend parent value to RHS + zix_btree_ainsert(rhs->data.leaf.vals, + rhs->n_vals++, + 0, + parent->data.inode.vals[i - 1]); - // Move last child pointer from LHS and prepend to RHS - if (!lhs->is_leaf) { - zix_btree_ainsert((void**)rhs->children, + // Move last value from LHS to parent + parent->data.inode.vals[i - 1] = lhs->data.leaf.vals[--lhs->n_vals]; + } else { + // Prepend parent value to RHS + zix_btree_ainsert(rhs->data.inode.vals, + rhs->n_vals++, + 0, + parent->data.inode.vals[i - 1]); + + // Move last child pointer from LHS and prepend to RHS + zix_btree_ainsert((void**)rhs->data.inode.children, rhs->n_vals, 0, - lhs->children[lhs->n_vals]); - } + lhs->data.inode.children[lhs->n_vals]); - // Move last value from LHS to parent - parent->vals[i - 1] = lhs->vals[--lhs->n_vals]; + // Move last value from LHS to parent + parent->data.inode.vals[i - 1] = lhs->data.inode.vals[--lhs->n_vals]; + } return rhs; } @@ -428,25 +508,39 @@ zix_btree_rotate_right(ZixBTreeNode* const parent, const unsigned i) ZIX_PRIVATE ZixBTreeNode* zix_btree_merge(ZixBTree* const t, ZixBTreeNode* const n, const unsigned i) { - ZixBTreeNode* const lhs = n->children[i]; - ZixBTreeNode* const rhs = n->children[i + 1]; + ZixBTreeNode* const lhs = zix_btree_child(n, i); + ZixBTreeNode* const rhs = zix_btree_child(n, i + 1); - assert(zix_btree_node_is_minimal(n->children[i])); + assert(lhs->is_leaf == rhs->is_leaf); + assert(zix_btree_node_is_minimal(lhs)); assert(lhs->n_vals + rhs->n_vals < zix_btree_max_vals(lhs)); // Move parent value to end of LHS - lhs->vals[lhs->n_vals++] = zix_btree_aerase(n->vals, n->n_vals, i); + if (lhs->is_leaf) { + lhs->data.leaf.vals[lhs->n_vals++] = + zix_btree_aerase(n->data.inode.vals, n->n_vals, i); + } else { + lhs->data.inode.vals[lhs->n_vals++] = + zix_btree_aerase(n->data.inode.vals, n->n_vals, i); + } // Erase corresponding child pointer (to RHS) in parent - zix_btree_aerase((void**)n->children, n->n_vals, i + 1U); + zix_btree_aerase((void**)n->data.inode.children, n->n_vals, i + 1U); // Add everything from RHS to end of LHS - memcpy(lhs->vals + lhs->n_vals, rhs->vals, rhs->n_vals * sizeof(void*)); - if (!lhs->is_leaf) { - memcpy(lhs->children + lhs->n_vals, - rhs->children, + if (lhs->is_leaf) { + memcpy(lhs->data.leaf.vals + lhs->n_vals, + rhs->data.leaf.vals, + rhs->n_vals * sizeof(void*)); + } else { + memcpy(lhs->data.inode.vals + lhs->n_vals, + rhs->data.inode.vals, + rhs->n_vals * sizeof(void*)); + memcpy(lhs->data.inode.children + lhs->n_vals, + rhs->data.inode.children, (rhs->n_vals + 1U) * sizeof(void*)); } + lhs->n_vals = (uint16_t)(lhs->n_vals + rhs->n_vals); if (--n->n_vals == 0) { @@ -465,9 +559,9 @@ ZIX_PRIVATE void* zix_btree_remove_min(ZixBTree* const t, ZixBTreeNode* n) { while (!n->is_leaf) { - if (zix_btree_node_is_minimal(n->children[0])) { + if (zix_btree_node_is_minimal(zix_btree_child(n, 0))) { // Leftmost child is minimal, must expand - if (!zix_btree_node_is_minimal(n->children[1])) { + if (!zix_btree_node_is_minimal(zix_btree_child(n, 1))) { // Child's right sibling has at least one key to steal n = zix_btree_rotate_left(n, 0); } else { @@ -475,11 +569,11 @@ zix_btree_remove_min(ZixBTree* const t, ZixBTreeNode* n) n = zix_btree_merge(t, n, 0); } } else { - n = n->children[0]; + n = zix_btree_child(n, 0); } } - return zix_btree_aerase(n->vals, --n->n_vals, 0); + return zix_btree_aerase(n->data.leaf.vals, --n->n_vals, 0); } /** Remove and return the max value from the subtree rooted at `n`. */ @@ -487,9 +581,9 @@ ZIX_PRIVATE void* zix_btree_remove_max(ZixBTree* const t, ZixBTreeNode* n) { while (!n->is_leaf) { - if (zix_btree_node_is_minimal(n->children[n->n_vals])) { + if (zix_btree_node_is_minimal(zix_btree_child(n, n->n_vals))) { // Leftmost child is minimal, must expand - if (!zix_btree_node_is_minimal(n->children[n->n_vals - 1])) { + if (!zix_btree_node_is_minimal(zix_btree_child(n, n->n_vals - 1))) { // Child's left sibling has at least one key to steal n = zix_btree_rotate_right(n, n->n_vals); } else { @@ -497,11 +591,11 @@ zix_btree_remove_max(ZixBTree* const t, ZixBTreeNode* n) n = zix_btree_merge(t, n, n->n_vals - 1U); } } else { - n = n->children[n->n_vals]; + n = zix_btree_child(n, n->n_vals); } } - return n->vals[--n->n_vals]; + return n->data.leaf.vals[--n->n_vals]; } ZixStatus @@ -534,7 +628,7 @@ zix_btree_remove(ZixBTree* const t, if (n->is_leaf) { if (equal) { // Found in leaf node - *out = zix_btree_aerase(n->vals, --n->n_vals, i); + *out = zix_btree_aerase(n->data.leaf.vals, --n->n_vals, i); if (ti && i == n->n_vals) { if (i == 0) { ti->level = 0; @@ -556,42 +650,46 @@ zix_btree_remove(ZixBTree* const t, } } else if (equal) { // Found in internal node - const size_t l_size = n->children[i]->n_vals; - const size_t r_size = n->children[i + 1]->n_vals; - if (zix_btree_node_is_minimal(n->children[i]) && - zix_btree_node_is_minimal(n->children[i + 1])) { + ZixBTreeNode* const lhs = zix_btree_child(n, i); + ZixBTreeNode* const rhs = zix_btree_child(n, i + 1); + const size_t l_size = lhs->n_vals; + const size_t r_size = rhs->n_vals; + if (zix_btree_node_is_minimal(lhs) && + zix_btree_node_is_minimal(rhs)) { // Both preceding and succeeding child are minimal n = zix_btree_merge(t, n, i); } else if (l_size >= r_size) { // Left child can remove without merge - assert(!zix_btree_node_is_minimal(n->children[i])); - *out = n->vals[i]; - n->vals[i] = zix_btree_remove_max(t, n->children[i]); + assert(!zix_btree_node_is_minimal(lhs)); + *out = n->data.inode.vals[i]; + n->data.inode.vals[i] = zix_btree_remove_max(t, lhs); --t->size; return ZIX_STATUS_SUCCESS; } else { // Right child can remove without merge - assert(!zix_btree_node_is_minimal(n->children[i + 1])); - *out = n->vals[i]; - n->vals[i] = zix_btree_remove_min(t, n->children[i + 1]); + assert(!zix_btree_node_is_minimal(rhs)); + *out = n->data.inode.vals[i]; + n->data.inode.vals[i] = zix_btree_remove_min(t, rhs); --t->size; return ZIX_STATUS_SUCCESS; } } else { // Not found in internal node, key is in/under children[i] - if (zix_btree_node_is_minimal(n->children[i])) { - if (i > 0 && !zix_btree_node_is_minimal(n->children[i - 1])) { + if (zix_btree_node_is_minimal(zix_btree_child(n, i))) { + if (i > 0 && + !zix_btree_node_is_minimal(zix_btree_child(n, i - 1))) { // Steal a key from child's left sibling n = zix_btree_rotate_right(n, i); } else if (i < n->n_vals && - !zix_btree_node_is_minimal(n->children[i + 1])) { + !zix_btree_node_is_minimal( + zix_btree_child(n, i + 1))) { // Steal a key from child's right sibling n = zix_btree_rotate_left(n, i); } else if (n == t->root && n->n_vals == 1) { // Root has two children, both minimal, delete it assert(i == 0 || i == 1); - const uint16_t counts[2] = {n->children[0]->n_vals, - n->children[1]->n_vals}; + const uint16_t counts[2] = {zix_btree_child(n, 0)->n_vals, + zix_btree_child(n, 1)->n_vals}; n = zix_btree_merge(t, n, 0); if (ti) { @@ -610,7 +708,7 @@ zix_btree_remove(ZixBTree* const t, } } } else { - n = n->children[i]; + n = zix_btree_child(n, i); } } if (ti) { @@ -644,7 +742,7 @@ zix_btree_find(const ZixBTree* const t, break; } else { ++(*ti)->level; - n = n->children[i]; + n = zix_btree_child(n, i); } } @@ -688,7 +786,7 @@ zix_btree_lower_bound(const ZixBTree* const t, break; } else { ++(*ti)->level; - n = n->children[i]; + n = zix_btree_child(n, i); assert(n); } } @@ -715,7 +813,7 @@ zix_btree_get(const ZixBTreeIter* const ti) const ZixBTreeIterFrame* const frame = &ti->stack[ti->level]; assert(frame->node); assert(frame->index < frame->node->n_vals); - return frame->node->vals[frame->index]; + return zix_btree_value(frame->node, frame->index); } ZixBTreeIter* @@ -732,7 +830,7 @@ zix_btree_begin(const ZixBTree* const t) i->stack[0].node = n; i->stack[0].index = 0; while (!n->is_leaf) { - n = n->children[0]; + n = zix_btree_child(n, 0); ++i->level; i->stack[i->level].node = n; i->stack[i->level].index = 0; @@ -809,7 +907,7 @@ zix_btree_iter_increment(ZixBTreeIter* const i) } else { // Internal node, move down to next child assert(f->index < f->node->n_vals); - ZixBTreeNode* child = f->node->children[++f->index]; + ZixBTreeNode* child = zix_btree_child(f->node, ++f->index); f = &i->stack[++i->level]; f->node = child; @@ -817,7 +915,7 @@ zix_btree_iter_increment(ZixBTreeIter* const i) // Move down and left until we hit a leaf while (!f->node->is_leaf) { - child = f->node->children[0]; + child = zix_btree_child(f->node, 0); f = &i->stack[++i->level]; f->node = child; f->index = 0; -- cgit v1.2.1