From aa7e97885285fc79b2bf35941dd428c7515c13bb Mon Sep 17 00:00:00 2001 From: David Robillard Date: Wed, 24 Sep 2014 23:47:11 +0000 Subject: Add sord_erase() for erasing statements via an iterator. git-svn-id: http://svn.drobilla.net/sord/trunk@310 3d64ff67-21c5-427c-a301-fe4f08042e5a --- NEWS | 5 +- sord/sord.h | 17 ++++- src/sord.c | 58 ++++++++++++++--- src/sord_test.c | 42 ++++++++++-- src/zix/btree.c | 194 ++++++++++++++++++++++++++++++++------------------------ src/zix/btree.h | 10 ++- wscript | 2 +- 7 files changed, 225 insertions(+), 103 deletions(-) diff --git a/NEWS b/NEWS index e15b946..02519b5 100644 --- a/NEWS +++ b/NEWS @@ -1,8 +1,9 @@ -sord (0.12.3) unstable; +sord (0.13.0) unstable; * Reduce memory usage and increase performance with a better data structure + * Add sord_erase() for erasing statements via an iterator. - -- David Robillard Tue, 23 Sep 2014 00:27:22 -0400 + -- David Robillard Wed, 24 Sep 2014 14:58:49 -0400 sord (0.12.2) stable; diff --git a/sord/sord.h b/sord/sord.h index 5ff9a7b..fda754e 100644 --- a/sord/sord.h +++ b/sord/sord.h @@ -443,6 +443,8 @@ sord_contains(SordModel* model, const SordQuad pat); /** Add a quad to a model. + + Calling this function invalidates all iterators on `model`. */ SORD_API bool @@ -451,12 +453,25 @@ sord_add(SordModel* model, const SordQuad quad); /** Remove a quad from a model. - Note that is it illegal to remove while iterating over `model`. + Calling this function invalidates all iterators on `model`. To remove quads + while iterating, use sord_erase() instead. */ SORD_API void sord_remove(SordModel* model, const SordQuad quad); +/** + Remove a quad from a model via an iterator. + + Calling this function invalidates all iterators on `model` except `iter`. + + @param iter Iterator to the element to erase, which is incremented to the + next value on return. +*/ +SORD_API +SerdStatus +sord_erase(SordModel* model, SordIter* iter); + /** @} @name Inserter diff --git a/src/sord.c b/src/sord.c index 9b4a11a..509135f 100644 --- a/src/sord.c +++ b/src/sord.c @@ -119,6 +119,7 @@ struct SordModelImpl { ZixBTree* indices[NUM_ORDERS]; size_t n_quads; + size_t n_iters; }; /** Mode for searching or iteration */ @@ -135,7 +136,7 @@ struct SordIterImpl { const SordModel* sord; ///< Model being iterated over ZixBTreeIter* cur; ///< Current DB cursor SordQuad pat; ///< Pattern (in ordering order) - int ordering[TUP_LEN]; ///< Store ordering + SordOrder order; ///< Store order (which index) SearchMode mode; ///< Iteration mode int n_prefix; ///< Prefix for RANGE and FILTER_RANGE bool end; ///< True iff reached end @@ -370,7 +371,7 @@ sord_iter_seek_match_range(SordIter* iter) return false; // Found match for (int i = 0; i < iter->n_prefix; ++i) { - const int idx = iter->ordering[i]; + const int idx = orderings[iter->order][i]; if (!sord_id_match(key[idx], iter->pat[idx])) { iter->end = true; // Reached end of valid range return true; @@ -385,18 +386,16 @@ static SordIter* sord_iter_new(const SordModel* sord, ZixBTreeIter* cur, const SordQuad pat, SordOrder order, SearchMode mode, int n_prefix) { - const int* ordering = orderings[order]; - SordIter* iter = (SordIter*)malloc(sizeof(SordIter)); iter->sord = sord; iter->cur = cur; + iter->order = order; iter->mode = mode; iter->n_prefix = n_prefix; iter->end = false; iter->skip_graphs = order < GSPO; for (int i = 0; i < TUP_LEN; ++i) { - iter->pat[i] = pat[i]; - iter->ordering[i] = ordering[i]; + iter->pat[i] = pat[i]; } switch (iter->mode) { @@ -422,6 +421,8 @@ sord_iter_new(const SordModel* sord, ZixBTreeIter* cur, const SordQuad pat, (void*)iter, TUP_FMT_ARGS(pat), TUP_FMT_ARGS(value), iter->end, iter->skip_graphs); #endif + + ++((SordModel*)sord)->n_iters; return iter; } @@ -469,7 +470,7 @@ sord_iter_next(SordIter* iter) key = (const SordNode**)zix_btree_get(iter->cur); assert(key); for (int i = 0; i < iter->n_prefix; ++i) { - const int idx = iter->ordering[i]; + const int idx = orderings[iter->order][i]; if (!sord_id_match(key[idx], iter->pat[idx])) { iter->end = true; SORD_ITER_LOG("%p reached non-match end\n", (void*)iter); @@ -515,6 +516,7 @@ sord_iter_free(SordIter* iter) { SORD_ITER_LOG("%p Free\n", (void*)iter); if (iter) { + --((SordModel*)iter->sord)->n_iters; zix_btree_iter_free(iter->cur); free(iter); } @@ -632,6 +634,7 @@ sord_new(SordWorld* world, unsigned indices, bool graphs) SordModel* sord = (SordModel*)malloc(sizeof(struct SordModelImpl)); sord->world = world; sord->n_quads = 0; + sord->n_iters = 0; for (unsigned i = 0; i < (NUM_ORDERS / 2); ++i) { const int* const ordering = orderings[i]; @@ -767,7 +770,7 @@ sord_begin(const SordModel* sord) return NULL; } else { ZixBTreeIter* cur = zix_btree_begin(sord->indices[DEFAULT_ORDER]); - SordQuad pat = { 0, 0, 0, 0 }; + SordQuad pat = { 0, 0, 0, 0 }; return sord_iter_new(sord, cur, pat, DEFAULT_ORDER, ALL, 0); } } @@ -1183,6 +1186,8 @@ sord_add(SordModel* sord, const SordQuad tup) error(sord->world, SERD_ERR_BAD_ARG, "attempt to add quad with NULL field\n"); return false; + } else if (sord->n_iters > 0) { + error(sord->world, SERD_ERR_BAD_ARG, "added tuple during iteration\n"); } const SordNode** quad = (const SordNode**)malloc(sizeof(SordQuad)); @@ -1209,11 +1214,14 @@ void sord_remove(SordModel* sord, const SordQuad tup) { SORD_WRITE_LOG("Remove " TUP_FMT "\n", TUP_FMT_ARGS(tup)); + if (sord->n_iters > 0) { + error(sord->world, SERD_ERR_BAD_ARG, "remove with iterator\n"); + } SordNode* quad = NULL; for (unsigned i = 0; i < NUM_ORDERS; ++i) { if (sord->indices[i]) { - if (zix_btree_remove(sord->indices[i], tup, (void**)&quad)) { + if (zix_btree_remove(sord->indices[i], tup, (void**)&quad, NULL)) { assert(i == 0); // Assuming index coherency return; // Quad not found, do nothing } @@ -1227,3 +1235,35 @@ sord_remove(SordModel* sord, const SordQuad tup) --sord->n_quads; } + +SerdStatus +sord_erase(SordModel* sord, SordIter* iter) +{ + if (sord->n_iters > 1) { + error(sord->world, SERD_ERR_BAD_ARG, "erased with many iterators\n"); + } + + SordQuad tup; + sord_iter_get(iter, tup); + + SORD_WRITE_LOG("Remove " TUP_FMT "\n", TUP_FMT_ARGS(tup)); + + SordNode* quad = NULL; + for (unsigned i = 0; i < NUM_ORDERS; ++i) { + if (sord->indices[i]) { + if (zix_btree_remove(sord->indices[i], tup, (void**)&quad, + i == iter->order ? &iter->cur : NULL)) { + return (i == 0) ? SERD_ERR_NOT_FOUND : SERD_ERR_INTERNAL; + } + } + } + iter->end = zix_btree_iter_is_end(iter->cur); + + free(quad); + + for (int i = 0; i < TUP_LEN; ++i) + sord_drop_quad_ref(sord, tup[i], (SordQuadIndex)i); + + --sord->n_quads; + return SERD_SUCCESS; +} diff --git a/src/sord_test.c b/src/sord_test.c index d00fc8a..f52cc01 100644 --- a/src/sord_test.c +++ b/src/sord_test.c @@ -73,7 +73,7 @@ generate(SordWorld* world, } for (unsigned j = 0; j < n_objects_per; ++j) { - SordQuad tup = { ids[0], ids[1], ids[2 + j] }; + SordQuad tup = { ids[0], ids[1], ids[2 + j], graph }; if (!sord_add(sord, tup)) { return test_fail("Fail: Failed to add quad\n"); } @@ -91,7 +91,7 @@ generate(SordWorld* world, tup[0] = uri(world, 98); tup[1] = uri(world, 4); tup[2] = sord_new_literal(world, 0, USTR("hello"), NULL); - tup[3] = 0; + tup[3] = graph; sord_add(sord, tup); sord_node_free(world, (SordNode*)tup[2]); tup[2] = sord_new_literal(world, uri(world, 5), USTR("hello"), NULL); @@ -103,7 +103,7 @@ generate(SordWorld* world, tup[0] = uri(world, 96); tup[1] = uri(world, 4); tup[2] = sord_new_literal(world, uri(world, 4), USTR("hello"), NULL); - tup[3] = 0; + tup[3] = graph; sord_add(sord, tup); sord_node_free(world, (SordNode*)tup[2]); tup[2] = sord_new_literal(world, uri(world, 5), USTR("hello"), NULL); @@ -115,7 +115,7 @@ generate(SordWorld* world, tup[0] = uri(world, 94); tup[1] = uri(world, 5); tup[2] = sord_new_literal(world, 0, USTR("hello"), NULL); - tup[3] = 0; + tup[3] = graph; sord_add(sord, tup); sord_node_free(world, (SordNode*)tup[2]); tup[2] = sord_new_literal(world, NULL, USTR("hello"), "en-gb"); @@ -127,7 +127,7 @@ generate(SordWorld* world, tup[0] = uri(world, 92); tup[1] = uri(world, 6); tup[2] = sord_new_literal(world, 0, USTR("hello"), "en-us"); - tup[3] = 0; + tup[3] = graph; sord_add(sord, tup); sord_node_free(world, (SordNode*)tup[2]); tup[2] = sord_new_literal(world, NULL, USTR("hello"), "en-gb"); @@ -518,7 +518,7 @@ main(int argc, char** argv) } // Test removing - sord = sord_new(world, SORD_SPO, false); + sord = sord_new(world, SORD_SPO, true); tup[0] = uri(world, 1); tup[1] = uri(world, 2); tup[2] = sord_new_literal(world, 0, USTR("hello"), NULL); @@ -545,6 +545,36 @@ main(int argc, char** argv) } sord_iter_free(iter); + // Load a couple graphs + SordNode* graph42 = uri(world, 42); + SordNode* graph43 = uri(world, 43); + generate(world, sord, 1, graph42); + generate(world, sord, 1, graph43); + + // Remove one graph via iterator + SerdStatus st; + iter = sord_search(sord, NULL, NULL, NULL, graph43); + while (!sord_iter_end(iter)) { + if ((st = sord_erase(sord, iter))) { + fprintf(stderr, "Remove by iterator failed (%s)\n", + serd_strerror(st)); + goto fail; + } + } + sord_iter_free(iter); + + // Ensure only the other graph is left + SordQuad quad; + SordQuad pat = { 0, 0, 0, graph42 }; + for (iter = sord_begin(sord); !sord_iter_end(iter); sord_iter_next(iter)) { + sord_iter_get(iter, quad); + if (!sord_quad_match(quad, pat)) { + fprintf(stderr, "Graph removal via iteration failed\n"); + goto fail; + } + } + sord_iter_free(iter); + sord_free(sord); sord_world_free(world); diff --git a/src/zix/btree.c b/src/zix/btree.c index b03a6c5..e7912d0 100644 --- a/src/zix/btree.c +++ b/src/zix/btree.c @@ -317,6 +317,28 @@ zix_btree_insert(ZixBTree* const t, void* const e) return ZIX_STATUS_SUCCESS; } +ZIX_PRIVATE ZixBTreeIter* +zix_btree_iter_new(const ZixBTree* const t) +{ + const size_t s = t->height * sizeof(ZixBTreeIterFrame); + ZixBTreeIter* const i = (ZixBTreeIter*)malloc(sizeof(ZixBTreeIter) + s); + if (i) { + i->level = 0; + } + return i; +} + +ZIX_PRIVATE void +zix_btree_iter_set_frame(ZixBTreeIter* const ti, + ZixBTreeNode* const n, + const uint16_t i) +{ + if (ti) { + ti->stack[ti->level].node = n; + ti->stack[ti->level].index = i; + } +} + ZIX_PRIVATE bool zix_btree_node_is_minimal(ZixBTreeNode* const n) { @@ -450,67 +472,100 @@ zix_btree_remove_max(ZixBTree* const t, ZixBTreeNode* n) return n->vals[--n->n_vals]; } -ZIX_PRIVATE ZixStatus -zix_btree_remove_rec(ZixBTree* const t, - ZixBTreeNode* const n, - const void* const e, - void** const removed) +ZIX_API ZixStatus +zix_btree_remove(ZixBTree* const t, + const void* const e, + void** const out, + ZixBTreeIter** const next) { - /* To remove in a single walk down, the tree is adjusted along the way so - that the current node always has at least one more value than the - minimum required in general. Thus, there is always room to remove - without adjusting on the way back up. */ - assert(n == t->root || !zix_btree_node_is_minimal(n)); + ZixBTreeNode* n = t->root; + ZixBTreeIter* ti = NULL; + const bool user_iter = next && *next; + if (next) { + if (!*next && !(*next = zix_btree_iter_new(t))) { + return ZIX_STATUS_NO_MEM; + } + ti = *next; + ti->level = 0; + } - bool equal = false; - const uint16_t i = zix_btree_node_find(t, n, e, &equal); + while (true) { + /* To remove in a single walk down, the tree is adjusted along the way + so that the current node always has at least one more value than the + minimum required in general. Thus, there is always room to remove + without adjusting on the way back up. */ + assert(n == t->root || !zix_btree_node_is_minimal(n)); - if (n->is_leaf) { - if (equal) { - // Found in leaf node - *removed = zix_btree_aerase(n->vals, --n->n_vals, i); - return ZIX_STATUS_SUCCESS; - } else { - // Not found in leaf node, or tree - return ZIX_STATUS_NOT_FOUND; - } - } else if (equal) { - // Found in internal node - if (!zix_btree_node_is_minimal(n->children[i])) { - // Left child can remove without merge - *removed = n->vals[i]; - n->vals[i] = zix_btree_remove_max(t, n->children[i]); - return ZIX_STATUS_SUCCESS; - } else if (!zix_btree_node_is_minimal(n->children[i + 1])) { - // Right child can remove without merge - *removed = n->vals[i]; - n->vals[i] = zix_btree_remove_min(t, n->children[i + 1]); - return ZIX_STATUS_SUCCESS; - } else { - // Both preceding and succeeding child are minimal - ZixBTreeNode* const merged = zix_btree_merge(t, n, i); - return zix_btree_remove_rec(t, merged, e, removed); - } - } else { - // Not found in internal node, key is in/under children[i] - if (zix_btree_node_is_minimal(n->children[i])) { - if (i > 0 && !zix_btree_node_is_minimal(n->children[i - 1])) { - // Child's left sibling has at least one key to steal - ZixBTreeNode* const rhs = zix_btree_rotate_right(n, i); - return zix_btree_remove_rec(t, rhs, e, removed); - } else if (i < n->n_vals && - !zix_btree_node_is_minimal(n->children[i + 1])) { - // Child's right sibling has at least one key to steal - ZixBTreeNode* const lhs = zix_btree_rotate_left(n, i); - return zix_btree_remove_rec(t, lhs, e, removed); + bool equal = false; + const uint16_t i = zix_btree_node_find(t, n, e, &equal); + zix_btree_iter_set_frame(ti, n, i); + if (n->is_leaf) { + if (equal) { + // Found in leaf node + *out = zix_btree_aerase(n->vals, --n->n_vals, i); + if (ti && i == n->n_vals) { + if (i == 0) { + ti->stack[ti->level = 0].node = NULL; + } else { + --ti->stack[ti->level].index; + zix_btree_iter_increment(ti); + } + } + --t->size; + return ZIX_STATUS_SUCCESS; + } else { + // Not found in leaf node, or tree + if (ti && !user_iter) { + zix_btree_iter_free(ti); + *next = NULL; + } + return ZIX_STATUS_NOT_FOUND; + } + } else if (equal) { + // Found in internal node + if (!zix_btree_node_is_minimal(n->children[i])) { + // Left child can remove without merge + *out = n->vals[i]; + n->vals[i] = zix_btree_remove_max(t, n->children[i]); + --t->size; + return ZIX_STATUS_SUCCESS; + } else if (!zix_btree_node_is_minimal(n->children[i + 1])) { + // Right child can remove without merge + *out = n->vals[i]; + n->vals[i] = zix_btree_remove_min(t, n->children[i + 1]); + --t->size; + return ZIX_STATUS_SUCCESS; } else { - // Both child's siblings are minimal, merge them - const uint16_t m = (i < n->n_vals) ? i : i - 1; - ZixBTreeNode* const merged = zix_btree_merge(t, n, m); - return zix_btree_remove_rec(t, merged, e, removed); + // Both preceding and succeeding child are minimal + n = zix_btree_merge(t, n, i); } } else { - return zix_btree_remove_rec(t, n->children[i], e, removed); + // Not found in internal node, key is in/under children[i] + if (zix_btree_node_is_minimal(n->children[i])) { + if (i > 0 && !zix_btree_node_is_minimal(n->children[i - 1])) { + // Steal a key from child's left sibling + n = zix_btree_rotate_right(n, i); + } else if (i < n->n_vals && + !zix_btree_node_is_minimal(n->children[i + 1])) { + // Steal a key from child's right sibling + n = zix_btree_rotate_left(n, i); + } else { + // Both child's siblings are minimal, merge them + if (i < n->n_vals) { + n = zix_btree_merge(t, n, i); + } else { + n = zix_btree_merge(t, n, i - 1); + if (ti) { + --ti->stack[ti->level].index; + } + } + } + } else { + n = n->children[i]; + } + } + if (ti) { + ++ti->level; } } @@ -518,27 +573,6 @@ zix_btree_remove_rec(ZixBTree* const t, return ZIX_STATUS_ERROR; } -ZIX_API ZixStatus -zix_btree_remove(ZixBTree* const t, const void* const e, void** const removed) -{ - const ZixStatus st = zix_btree_remove_rec(t, t->root, e, removed); - if (!st) { - --t->size; - } - return st; -} - -ZIX_PRIVATE ZixBTreeIter* -zix_btree_iter_new(const ZixBTree* const t) -{ - const size_t s = t->height * sizeof(ZixBTreeIterFrame); - ZixBTreeIter* const i = (ZixBTreeIter*)malloc(sizeof(ZixBTreeIter) + s); - if (i) { - i->level = 0; - } - return i; -} - ZIX_API ZixStatus zix_btree_find(const ZixBTree* const t, const void* const e, @@ -553,9 +587,7 @@ zix_btree_find(const ZixBTree* const t, bool equal = false; const uint16_t i = zix_btree_node_find(t, n, e, &equal); - // Update iterator stack - (*ti)->stack[(*ti)->level].node = n; - (*ti)->stack[(*ti)->level].index = i; + zix_btree_iter_set_frame(*ti, n, i); if (equal) { return ZIX_STATUS_SUCCESS; @@ -588,9 +620,7 @@ zix_btree_lower_bound(const ZixBTree* const t, bool equal = false; const uint16_t i = zix_btree_node_find(t, n, e, &equal); - // Update iterator stack - (*ti)->stack[(*ti)->level].node = n; - (*ti)->stack[(*ti)->level].index = i; + zix_btree_iter_set_frame(*ti, n, i); if (equal) { found_level = (*ti)->level; diff --git a/src/zix/btree.h b/src/zix/btree.h index 0cd549a..3acd3a4 100644 --- a/src/zix/btree.h +++ b/src/zix/btree.h @@ -78,10 +78,16 @@ ZIX_API ZixStatus zix_btree_insert(ZixBTree* t, void* e); /** - Remove the value `e` from `t`, set `removed` to the removed pointer. + Remove the value `e` from `t`. + + @param out Set to point to the removed pointer (which may not equal `e`). + + @param next If non-NULL, pointed to the value following `e`. If *next is + also non-NULL, the iterator is reused, otherwise a new one is allocated. To + reuse an iterator, no items may have been added since its creation. */ ZIX_API ZixStatus -zix_btree_remove(ZixBTree* t, const void* e, void** removed); +zix_btree_remove(ZixBTree* t, const void* e, void** out, ZixBTreeIter** next); /** Set `ti` to an element equal to `e` in `t`. diff --git a/wscript b/wscript index b31b2e5..e605632 100644 --- a/wscript +++ b/wscript @@ -10,7 +10,7 @@ import waflib.extras.autowaf as autowaf # major increment <=> incompatible changes # minor increment <=> compatible changes (additions) # micro increment <=> no interface changes -SORD_VERSION = '0.12.3' +SORD_VERSION = '0.13.0' SORD_MAJOR_VERSION = '0' # Mandatory waf variables -- cgit v1.2.1