From e9b773bf452f0df0ea69e363bcb5899fea124b28 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Sun, 11 Apr 2021 22:07:39 -0400 Subject: Add model --- src/model.c | 728 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 728 insertions(+) create mode 100644 src/model.c (limited to 'src/model.c') diff --git a/src/model.c b/src/model.c new file mode 100644 index 00000000..0485b509 --- /dev/null +++ b/src/model.c @@ -0,0 +1,728 @@ +/* + Copyright 2011-2020 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "model.h" + +#include "caret.h" +#include "compare.h" +#include "cursor.h" +#include "statement.h" + +#include "zix/btree.h" +#include "zix/common.h" + +#include +#include +#include +#include + +static const SerdQuad everything_pattern = {0, 0, 0, 0}; + +/// A 3-bit signature for a triple pattern used as a table index +typedef enum { + SERD_SIGNATURE_XXX, // 000 (???) + SERD_SIGNATURE_XXO, // 001 (??O) + SERD_SIGNATURE_XPX, // 010 (?P?) + SERD_SIGNATURE_XPO, // 011 (?PO) + SERD_SIGNATURE_SXX, // 100 (S??) + SERD_SIGNATURE_SXO, // 101 (S?O) + SERD_SIGNATURE_SPX, // 110 (SP?) + SERD_SIGNATURE_SPO, // 111 (SPO) +} SerdPatternSignature; + +static ZixComparator +serd_model_index_comparator(const SerdModel* const model, + const SerdStatementOrder order) +{ + return (order < SERD_ORDER_GSPO && !(model->flags & SERD_STORE_GRAPHS)) + ? serd_triple_compare + : serd_quad_compare; +} + +static ZixComparator +serd_model_pattern_comparator(const SerdModel* const model, + const SerdStatementOrder order) +{ + return (order < SERD_ORDER_GSPO && !(model->flags & SERD_STORE_GRAPHS)) + ? serd_triple_compare_pattern + : serd_quad_compare_pattern; +} + +SerdStatus +serd_model_add_index(SerdModel* const model, const SerdStatementOrder order) +{ + if (model->indices[order]) { + return SERD_FAILURE; + } + + const unsigned* const ordering = orderings[order]; + const ZixComparator comparator = serd_model_index_comparator(model, order); + + model->indices[order] = zix_btree_new(comparator, ordering); + + ZixStatus zst = model->indices[order] ? ZIX_STATUS_SUCCESS : ZIX_STATUS_ERROR; + + // Insert statements from the default index + if (order != model->default_order) { + ZixBTree* const default_index = model->indices[model->default_order]; + for (ZixBTreeIter i = zix_btree_begin(default_index); + !zst && !zix_btree_iter_is_end(i); + zix_btree_iter_increment(&i)) { + zst = zix_btree_insert(model->indices[order], zix_btree_get(i)); + } + } + + return zst ? serd_model_drop_index(model, order) : SERD_SUCCESS; +} + +SerdStatus +serd_model_drop_index(SerdModel* const model, const SerdStatementOrder order) +{ + if (!model->indices[order]) { + return SERD_FAILURE; + } + + if (order == model->default_order) { + return SERD_ERR_BAD_CALL; + } + + zix_btree_free(model->indices[order], NULL); + model->indices[order] = NULL; + return SERD_SUCCESS; +} + +SerdModel* +serd_model_new(SerdWorld* const world, + const SerdStatementOrder default_order, + const SerdModelFlags flags) +{ + SerdModel* model = (SerdModel*)calloc(1, sizeof(struct SerdModelImpl)); + + model->world = world; + model->nodes = serd_nodes_new(); + model->default_order = default_order; + model->flags = flags; + + serd_model_add_index(model, default_order); + + const ScanStrategy end_strategy = {SCAN_EVERYTHING, 0u, default_order}; + + model->end = serd_cursor_make(model, + zix_btree_end(model->indices[default_order]), + everything_pattern, + end_strategy); + + return model; +} + +SerdModel* +serd_model_copy(const SerdModel* const model) +{ + SerdModel* copy = + serd_model_new(model->world, model->default_order, model->flags); + + SerdCursor* cursor = serd_model_begin(model); + serd_model_insert_statements(copy, cursor); + serd_cursor_free(cursor); + + assert(serd_model_size(model) == serd_model_size(copy)); + assert(serd_nodes_size(model->nodes) == serd_nodes_size(copy->nodes)); + return copy; +} + +static void +log_bad_index(const SerdModel* const model, + const char* const description, + const SerdStatementOrder index_order, + const bool s, + const bool p, + const bool o, + const bool g) +{ + static const char* const order_strings[] = { + "S P O G", + "S O P G", + "O P S G", + "O S P G", + "P S O G", + "P O S G", + "G S P O", + "G S O P", + "G O P S", + "G O S P", + "G P S O", + "G P O S", + }; + + serd_logf(model->world, + SERD_LOG_LEVEL_WARNING, + "%s index (%s) for (%s %s %s%s) query", + description, + order_strings[index_order], + s ? "S" : "?", + p ? "P" : "?", + o ? "O" : "?", + (model->flags & SERD_STORE_GRAPHS) ? g ? " G" : " ?" : ""); +} + +static SerdCursor +make_begin_cursor(const SerdModel* const model, const SerdStatementOrder order) +{ + if (!model->indices[order]) { + log_bad_index(model, "missing", order, false, false, false, false); + return model->end; + } + + const ScanStrategy strategy = {SCAN_EVERYTHING, 0u, order}; + + return serd_cursor_make(model, + zix_btree_begin(model->indices[order]), + everything_pattern, + strategy); +} + +bool +serd_model_equals(const SerdModel* const a, const SerdModel* const b) +{ + if (!a && !b) { + return true; + } + + if (!a || !b || serd_model_size(a) != serd_model_size(b)) { + return false; + } + + SerdCursor ia = make_begin_cursor(a, a->default_order); + SerdCursor ib = make_begin_cursor(b, b->default_order); + + while (!serd_cursor_is_end(&ia) && !serd_cursor_is_end(&ib)) { + if (!serd_statement_equals(serd_cursor_get(&ia), serd_cursor_get(&ib)) || + serd_cursor_advance(&ia) > SERD_FAILURE || + serd_cursor_advance(&ib) > SERD_FAILURE) { + return false; + } + } + + return serd_cursor_is_end(&ia) && serd_cursor_is_end(&ib); +} + +static void +serd_model_drop_statement(SerdModel* const model, + SerdStatement* const statement) +{ + assert(statement); + + for (unsigned i = 0u; i < 4; ++i) { + if (statement->nodes[i]) { + serd_nodes_deref(model->nodes, statement->nodes[i]); + } + } + + if (statement->caret && statement->caret->file) { + serd_nodes_deref(model->nodes, statement->caret->file); + } + + serd_statement_free(statement); +} + +void +serd_model_free(SerdModel* const model) +{ + if (!model) { + return; + } + + // Free all statements (which are owned by the default index) + ZixBTree* const default_index = model->indices[model->default_order]; + zix_btree_clear(default_index, (ZixDestroyFunc)serd_statement_free); + + // Free indices themselves + for (unsigned i = 0u; i < N_STATEMENT_ORDERS; ++i) { + zix_btree_free(model->indices[i], NULL); + } + + serd_nodes_free(model->nodes); + free(model); +} + +SerdWorld* +serd_model_world(SerdModel* const model) +{ + return model->world; +} + +const SerdNodes* +serd_model_nodes(const SerdModel* const model) +{ + return model->nodes; +} + +SerdStatementOrder +serd_model_default_order(const SerdModel* const model) +{ + return model->default_order; +} + +SerdModelFlags +serd_model_flags(const SerdModel* const model) +{ + return model->flags; +} + +size_t +serd_model_size(const SerdModel* const model) +{ + return zix_btree_size(model->indices[model->default_order]); +} + +bool +serd_model_empty(const SerdModel* const model) +{ + return serd_model_size(model) == 0; +} + +SerdCursor* +serd_model_begin_ordered(const SerdModel* const model, + const SerdStatementOrder order) +{ + const SerdCursor cursor = make_begin_cursor(model, order); + + return serd_cursor_copy(&cursor); +} + +SerdCursor* +serd_model_begin(const SerdModel* const model) +{ + return serd_model_begin_ordered(model, model->default_order); +} + +const SerdCursor* +serd_model_end(const SerdModel* const model) +{ + return &model->end; +} + +static SerdStatementOrder +simple_order(const SerdStatementOrder order) +{ + return order < SERD_ORDER_GSPO ? order : (SerdStatementOrder)(order - 6u); +} + +/// Return the best index scanning strategy for a pattern with given nodes +static ScanStrategy +serd_model_strategy(const SerdModel* const model, + const bool with_s, + const bool with_p, + const bool with_o, + const bool with_g) +{ +#define N_STRATEGY_TIERS 4u + + const SerdStatementOrder default_order = simple_order(model->default_order); + const ScanStrategy none = {SCAN_EVERYTHING, 0u, default_order}; + + const ScanStrategy strategies[N_STRATEGY_TIERS][8] = { + // Preferred perfect strategies: SPO, SOP, OPS, PSO + { + none, + {SCAN_RANGE, 1u, SERD_ORDER_OPS}, // ??O + {SCAN_RANGE, 1u, SERD_ORDER_PSO}, // ?P? + {SCAN_RANGE, 2u, SERD_ORDER_OPS}, // ?PO + {SCAN_RANGE, 1u, SERD_ORDER_SPO}, // S?? + {SCAN_RANGE, 2u, SERD_ORDER_SOP}, // S?O + {SCAN_RANGE, 2u, SERD_ORDER_SPO}, // SP? + {SCAN_RANGE, 3u, default_order} // SPO + }, + + // Alternate perfect strategies: adds OSP and POS + { + none, // ??? + {SCAN_RANGE, 1u, SERD_ORDER_OSP}, // ??O + {SCAN_RANGE, 1u, SERD_ORDER_POS}, // ?P? + {SCAN_RANGE, 2u, SERD_ORDER_POS}, // ?PO + {SCAN_RANGE, 1u, SERD_ORDER_SOP}, // S?? + {SCAN_RANGE, 2u, SERD_ORDER_OSP}, // S?O + {SCAN_RANGE, 2u, SERD_ORDER_PSO}, // SP? + none // SPO + }, + + // Preferred partial prefix strategies + { + none, // ??? + none, // ??O + none, // ?P? + {FILTER_RANGE, 1u, SERD_ORDER_PSO}, // ?PO + none, // S?? + {FILTER_RANGE, 1u, SERD_ORDER_SPO}, // S?O + {FILTER_RANGE, 1u, SERD_ORDER_SOP}, // SP? + none // SPO + }, + + // Alternate partial prefix strategies + { + none, // ??? + none, // ??O + none, // ?P? + {FILTER_RANGE, 1u, SERD_ORDER_OSP}, // ?PO + none, // S?? + {FILTER_RANGE, 1u, SERD_ORDER_OPS}, // S?O + {FILTER_RANGE, 1u, SERD_ORDER_POS}, // SP? + none // SPO + }, + }; + + // Construct a 3-bit signature for this triple pattern + const SerdPatternSignature sig = + (SerdPatternSignature)(((with_s ? 1u : 0u) << 2u) + // + ((with_p ? 1u : 0u) << 1u) + // + ((with_o ? 1u : 0u))); + + if (!sig && !with_g) { + return none; + } + + // Try to find a strategy we can support, from most to least preferred + for (unsigned t = 0u; t < N_STRATEGY_TIERS; ++t) { + ScanStrategy strategy = strategies[t][sig]; + const SerdStatementOrder triple_order = strategy.order; + + assert(strategy.order < SERD_ORDER_GSPO); + + if (strategy.n_prefix > 0) { + if (with_g) { + const SerdStatementOrder quad_order = + (SerdStatementOrder)(triple_order + 6u); + + if (model->indices[quad_order]) { + strategy.order = quad_order; + ++strategy.n_prefix; + return strategy; + } + } + + if (model->indices[triple_order]) { + return strategy; + } + } + } + + // Indices don't help with the triple, try to at least stay in the graph + if (with_g) { + for (unsigned i = SERD_ORDER_GSPO; i <= SERD_ORDER_GPOS; ++i) { + if (model->indices[i]) { + const ScanMode mode = sig == 0x000 ? SCAN_RANGE : FILTER_RANGE; + const ScanStrategy strategy = {mode, 1u, (SerdStatementOrder)i}; + + return strategy; + } + } + } + + // All is lost, regress to linear search + const ScanStrategy linear = {FILTER_EVERYTHING, 0u, model->default_order}; + return linear; +} + +static SerdCursor +serd_model_search(const SerdModel* const model, + const SerdNode* const s, + const SerdNode* const p, + const SerdNode* const o, + const SerdNode* const g) +{ + // Build a pattern of interned nodes + const SerdQuad pattern = {serd_nodes_get(model->nodes, s), + serd_nodes_get(model->nodes, p), + serd_nodes_get(model->nodes, o), + serd_nodes_get(model->nodes, g)}; + + // If some node isn't in the model at all, no need to search for statements + const int n_given = !!s + !!p + !!o + !!g; + if (n_given != (!!pattern[0] + !!pattern[1] + !!pattern[2] + !!pattern[3])) { + return model->end; + } + + // Determine the best available search strategy + const ScanStrategy strategy = serd_model_strategy(model, s, p, o, g); + const SerdStatementOrder order = strategy.order; + ZixBTree* const index = model->indices[order]; + + // Issue any performance warnings, and return early for degenerate cases + switch (strategy.mode) { + case SCAN_EVERYTHING: + return make_begin_cursor(model, order); + case SCAN_RANGE: + break; + case FILTER_EVERYTHING: + log_bad_index(model, "using effectively linear", order, s, p, o, g); + return serd_cursor_make(model, zix_btree_begin(index), pattern, strategy); + case FILTER_RANGE: + log_bad_index(model, "filtering partial", order, s, p, o, g); + break; + } + + // Find the first statement matching the pattern in the index + ZixBTreeIter iter = zix_btree_end_iter; + zix_btree_lower_bound(index, + serd_model_pattern_comparator(model, order), + orderings[order], + pattern, + &iter); + + return (!zix_btree_iter_is_end(iter) && + serd_iter_in_range(iter, pattern, strategy)) + ? serd_cursor_make(model, iter, pattern, strategy) + : model->end; +} + +SerdCursor* +serd_model_find(const SerdModel* const model, + const SerdNode* const s, + const SerdNode* const p, + const SerdNode* const o, + const SerdNode* const g) +{ + const SerdCursor cursor = serd_model_search(model, s, p, o, g); + + return zix_btree_iter_is_end(cursor.iter) ? NULL : serd_cursor_copy(&cursor); +} + +const SerdNode* +serd_model_get(const SerdModel* const model, + const SerdNode* const s, + const SerdNode* const p, + const SerdNode* const o, + const SerdNode* const g) +{ + const SerdStatement* const statement = + serd_model_get_statement(model, s, p, o, g); + + return !statement ? NULL + : !s ? serd_statement_subject(statement) + : !p ? serd_statement_predicate(statement) + : !o ? serd_statement_object(statement) + : !g ? serd_statement_graph(statement) + : NULL; +} + +const SerdStatement* +serd_model_get_statement(const SerdModel* const model, + const SerdNode* const s, + const SerdNode* const p, + const SerdNode* const o, + const SerdNode* const g) +{ + if ((bool)s + (bool)p + (bool)o != 2 && + (bool)s + (bool)p + (bool)o + (bool)g != 3) { + return NULL; + } + + const SerdCursor i = serd_model_search(model, s, p, o, g); + + return serd_cursor_get(&i); +} + +size_t +serd_model_count(const SerdModel* const model, + const SerdNode* const s, + const SerdNode* const p, + const SerdNode* const o, + const SerdNode* const g) +{ + SerdCursor i = serd_model_search(model, s, p, o, g); + size_t count = 0; + + for (; !serd_cursor_is_end(&i); serd_cursor_advance(&i)) { + ++count; + } + + return count; +} + +bool +serd_model_ask(const SerdModel* const model, + const SerdNode* const s, + const SerdNode* const p, + const SerdNode* const o, + const SerdNode* const g) +{ + const SerdCursor c = serd_model_search(model, s, p, o, g); + + return !serd_cursor_is_end(&c); +} + +static SerdCaret* +serd_model_intern_caret(SerdModel* const model, const SerdCaret* const caret) +{ + if (caret) { + SerdCaret* copy = (SerdCaret*)calloc(1, sizeof(SerdCaret)); + + copy->file = serd_nodes_intern(model->nodes, caret->file); + copy->line = caret->line; + copy->col = caret->col; + return copy; + } + + return NULL; +} + +SerdStatus +serd_model_add_with_caret(SerdModel* const model, + const SerdNode* const s, + const SerdNode* const p, + const SerdNode* const o, + const SerdNode* const g, + const SerdCaret* const caret) +{ + SerdStatement* const statement = serd_statement_new(s, p, o, g, NULL); + if (!statement) { + return SERD_ERR_UNKNOWN; + } + + statement->caret = serd_model_intern_caret(model, caret); + + bool added = false; + for (unsigned i = 0u; i < N_STATEMENT_ORDERS; ++i) { + if (model->indices[i]) { + if (!zix_btree_insert(model->indices[i], statement)) { + added = true; + } else if ((SerdStatementOrder)i == model->default_order) { + break; // Statement already indexed + } + } + } + + ++model->version; + if (added) { + return SERD_SUCCESS; + } + + serd_model_drop_statement(model, statement); + return SERD_FAILURE; +} + +SerdStatus +serd_model_add(SerdModel* const model, + const SerdNode* const s, + const SerdNode* const p, + const SerdNode* const o, + const SerdNode* const g) +{ + return serd_model_add_with_caret(model, + serd_nodes_intern(model->nodes, s), + serd_nodes_intern(model->nodes, p), + serd_nodes_intern(model->nodes, o), + serd_nodes_intern(model->nodes, g), + NULL); +} + +SerdStatus +serd_model_insert(SerdModel* const model, const SerdStatement* const statement) +{ + SerdNodes* const nodes = model->nodes; + + return serd_model_add_with_caret( + model, + serd_nodes_intern(nodes, serd_statement_subject(statement)), + serd_nodes_intern(nodes, serd_statement_predicate(statement)), + serd_nodes_intern(nodes, serd_statement_object(statement)), + serd_nodes_intern(nodes, serd_statement_graph(statement)), + serd_statement_caret(statement)); +} + +SerdStatus +serd_model_insert_statements(SerdModel* const model, SerdCursor* const range) +{ + const SerdStatement* statement = serd_cursor_get(range); + SerdStatus st = SERD_SUCCESS; + + while (!st && statement) { + if (!(st = serd_model_insert(model, statement))) { + st = serd_cursor_advance(range); + } + + statement = serd_cursor_get(range); + } + + return st; +} + +SerdStatus +serd_model_erase(SerdModel* const model, SerdCursor* const cursor) +{ + const SerdStatement* statement = serd_cursor_get(cursor); + SerdStatement* removed = NULL; + ZixStatus zst = ZIX_STATUS_SUCCESS; + + if (!statement) { + return SERD_FAILURE; + } + + // Erase from the index associated with this cursor + zst = zix_btree_remove(model->indices[cursor->strategy.order], + statement, + (void**)&removed, + &cursor->iter); + + // No error should be possible with a valid cursor + assert(!zst); + assert(removed); + assert(removed == statement); + + for (unsigned i = SERD_ORDER_SPO; i <= SERD_ORDER_GPOS; ++i) { + ZixBTree* index = model->indices[i]; + + if (index && i != (unsigned)cursor->strategy.order) { + SerdStatement* ignored = NULL; + ZixBTreeIter next = zix_btree_end_iter; + zst = zix_btree_remove(index, statement, (void**)&ignored, &next); + + assert(zst == ZIX_STATUS_SUCCESS || zst == ZIX_STATUS_NOT_FOUND); + assert(!ignored || ignored == removed); + } + } + serd_cursor_scan_next(cursor); + + serd_model_drop_statement(model, removed); + cursor->version = ++model->version; + + (void)zst; + return SERD_SUCCESS; +} + +SerdStatus +serd_model_erase_statements(SerdModel* const model, SerdCursor* const range) +{ + SerdStatus st = SERD_SUCCESS; + + while (!st && !serd_cursor_is_end(range)) { + st = serd_model_erase(model, range); + } + + return st; +} + +SerdStatus +serd_model_clear(SerdModel* const model) +{ + SerdCursor i = make_begin_cursor(model, model->default_order); + + while (!serd_cursor_is_end(&i)) { + serd_model_erase(model, &i); + } + + return SERD_SUCCESS; +} -- cgit v1.2.1