diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/.clang-tidy | 1 | ||||
-rw-r--r-- | src/compare.c | 158 | ||||
-rw-r--r-- | src/compare.h | 54 | ||||
-rw-r--r-- | src/cursor.c | 224 | ||||
-rw-r--r-- | src/cursor.h | 83 | ||||
-rw-r--r-- | src/describe.c | 266 | ||||
-rw-r--r-- | src/inserter.c | 122 | ||||
-rw-r--r-- | src/model.c | 728 | ||||
-rw-r--r-- | src/model.h | 37 | ||||
-rw-r--r-- | src/reader.c | 2 | ||||
-rw-r--r-- | src/serdi.c | 59 | ||||
-rw-r--r-- | src/statement.c | 8 | ||||
-rw-r--r-- | src/string.c | 4 |
13 files changed, 1741 insertions, 5 deletions
diff --git a/src/.clang-tidy b/src/.clang-tidy index a9e25a2f..6029eeaa 100644 --- a/src/.clang-tidy +++ b/src/.clang-tidy @@ -17,6 +17,7 @@ Checks: > -llvmlibc-*, -misc-no-recursion, -readability-function-cognitive-complexity, + -readability-suspicious-call-argument, WarningsAsErrors: '*' HeaderFilterRegex: '.*' FormatStyle: file diff --git a/src/compare.c b/src/compare.c new file mode 100644 index 00000000..82ff998f --- /dev/null +++ b/src/compare.c @@ -0,0 +1,158 @@ +/* + Copyright 2011-2021 David Robillard <d@drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "compare.h" + +#include "statement.h" + +#include "serd/serd.h" + +#include <assert.h> + +/// Compare a mandatory node with a node pattern +static inline int +serd_node_wildcard_compare(const SerdNode* SERD_NONNULL a, + const SerdNode* SERD_NULLABLE b) +{ + assert(a); + return b ? serd_node_compare(a, b) : 0; +} + +/// Compare an optional graph node with a node pattern +static inline int +serd_node_graph_compare(const SerdNode* SERD_NULLABLE a, + const SerdNode* SERD_NULLABLE b) +{ + if (a == b) { + return 0; + } + + if (!a) { + return -1; + } + + if (!b) { + return 1; + } + + return serd_node_compare(a, b); +} + +/// Compare statements lexicographically, ignoring graph +int +serd_triple_compare(const void* const x, + const void* const y, + const void* const user_data) +{ + const int* const ordering = (const int*)user_data; + const SerdStatement* const s = (const SerdStatement*)x; + const SerdStatement* const t = (const SerdStatement*)y; + + for (unsigned i = 0u; i < 3u; ++i) { + const int o = ordering[i]; + assert(o < 3); + + const int c = serd_node_compare(s->nodes[o], t->nodes[o]); + if (c) { + return c; + } + } + + return 0; +} + +/** + Compare statments with statement patterns lexicographically, ignoring graph. + + Null nodes in the second argument are treated as wildcards, always less than + any node. +*/ +int +serd_triple_compare_pattern(const void* const x, + const void* const y, + const void* const user_data) +{ + const int* const ordering = (const int*)user_data; + const SerdStatement* const s = (const SerdStatement*)x; + const SerdStatement* const t = (const SerdStatement*)y; + + for (unsigned i = 0u; i < 3u; ++i) { + const int o = ordering[i]; + assert(o < 3); + + const int c = serd_node_wildcard_compare(s->nodes[o], t->nodes[o]); + if (c) { + return c; + } + } + + return 0; +} + +/// Compare statements lexicographically +int +serd_quad_compare(const void* const x, + const void* const y, + const void* const user_data) +{ + const int* const ordering = (const int*)user_data; + const SerdStatement* const s = (const SerdStatement*)x; + const SerdStatement* const t = (const SerdStatement*)y; + + for (unsigned i = 0u; i < 4u; ++i) { + const int o = ordering[i]; + const int c = + (o == SERD_GRAPH) + ? serd_node_graph_compare(s->nodes[SERD_GRAPH], t->nodes[SERD_GRAPH]) + : serd_node_compare(s->nodes[o], t->nodes[o]); + + if (c) { + return c; + } + } + + return 0; +} + +/** + Compare statments with statement patterns lexicographically. + + Null nodes in the second argument are treated as wildcards, always less than + any node. +*/ +int +serd_quad_compare_pattern(const void* const x, + const void* const y, + const void* const user_data) +{ + const int* const ordering = (const int*)user_data; + const SerdStatement* const s = (const SerdStatement*)x; + const SerdStatement* const t = (const SerdStatement*)y; + + for (unsigned i = 0u; i < 4u; ++i) { + const int o = ordering[i]; + const int c = + (o == SERD_GRAPH) + ? serd_node_graph_compare(s->nodes[SERD_GRAPH], t->nodes[SERD_GRAPH]) + : serd_node_wildcard_compare(s->nodes[o], t->nodes[o]); + + if (c) { + return c; + } + } + + return 0; +} diff --git a/src/compare.h b/src/compare.h new file mode 100644 index 00000000..eabbf43e --- /dev/null +++ b/src/compare.h @@ -0,0 +1,54 @@ +/* + Copyright 2011-2021 David Robillard <d@drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_COMPARE_H +#define SERD_COMPARE_H + +#include "serd/serd.h" + +/// Compare statements lexicographically, ignoring graph +SERD_PURE_FUNC +int +serd_triple_compare(const void* x, const void* y, const void* user_data); + +/** + Compare statments with statement patterns lexicographically, ignoring graph. + + Null nodes in the second argument are treated as wildcards, always less than + any node. +*/ +SERD_PURE_FUNC +int +serd_triple_compare_pattern(const void* x, + const void* y, + const void* user_data); + +/// Compare statements lexicographically +SERD_PURE_FUNC +int +serd_quad_compare(const void* x, const void* y, const void* user_data); + +/** + Compare statments with statement patterns lexicographically. + + Null nodes in the second argument are treated as wildcards, always less than + any node. +*/ +SERD_PURE_FUNC +int +serd_quad_compare_pattern(const void* x, const void* y, const void* user_data); + +#endif // SERD_COMPARE_H diff --git a/src/cursor.c b/src/cursor.c new file mode 100644 index 00000000..d57da866 --- /dev/null +++ b/src/cursor.c @@ -0,0 +1,224 @@ +/* + Copyright 2011-2020 David Robillard <d@drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "cursor.h" + +#include "model.h" +#include "node.h" + +#include "serd/serd.h" +#include "zix/btree.h" +#include "zix/common.h" + +#include <assert.h> +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> + +static inline bool +statement_matches_quad(const SerdStatement* const statement, + const SerdQuad quad) +{ + return serd_statement_matches(statement, quad[0], quad[1], quad[2], quad[3]); +} + +SERD_PURE_FUNC +bool +serd_iter_in_range(const ZixBTreeIter iter, + const SerdQuad pattern, + const ScanStrategy strategy) +{ + const SerdStatement* const statement = + (const SerdStatement*)zix_btree_get(iter); + + for (unsigned i = 0u; i < strategy.n_prefix; ++i) { + const unsigned field = orderings[strategy.order][i]; + if (!serd_node_pattern_match(statement->nodes[field], pattern[field])) { + return false; + } + } + + return true; +} + +static bool +serd_cursor_in_range(const SerdCursor* const cursor) +{ + return (cursor->strategy.mode == FILTER_EVERYTHING || + serd_iter_in_range(cursor->iter, cursor->pattern, cursor->strategy)); +} + +/// Seek forward as necessary until the cursor points to a matching statement +static SerdStatus +serd_cursor_seek_match(SerdCursor* const cursor) +{ + assert(cursor->strategy.mode == FILTER_EVERYTHING || + cursor->strategy.mode == FILTER_RANGE); + + for (; !zix_btree_iter_is_end(cursor->iter); + zix_btree_iter_increment(&cursor->iter)) { + if (!serd_cursor_in_range(cursor)) { + // Went past the end of the matching range, reset to end + cursor->iter = zix_btree_end_iter; + return SERD_FAILURE; + } + + const SerdStatement* s = (const SerdStatement*)zix_btree_get(cursor->iter); + if (statement_matches_quad(s, cursor->pattern)) { + break; // Found matching statement + } + } + + return SERD_SUCCESS; +} + +static bool +check_version(const SerdCursor* const cursor) +{ + if (cursor->version != cursor->model->version) { + serd_logf(cursor->model->world, + SERD_LOG_LEVEL_ERROR, + "attempt to use invalidated cursor"); + return false; + } + + return true; +} + +SerdCursor +serd_cursor_make(const SerdModel* const model, + const ZixBTreeIter iter, + const SerdQuad pattern, + const ScanStrategy strategy) +{ + SerdCursor cursor = {model, + {pattern[0], pattern[1], pattern[2], pattern[3]}, + model->version, + iter, + strategy}; + + if (strategy.mode == FILTER_RANGE || strategy.mode == FILTER_EVERYTHING) { + serd_cursor_seek_match(&cursor); + } + +#ifndef NDEBUG + if (!zix_btree_iter_is_end(cursor.iter)) { + // Check that the cursor is at a matching statement + const SerdStatement* first = + (const SerdStatement*)zix_btree_get(cursor.iter); + assert(statement_matches_quad(first, pattern)); + + // Check that any nodes in the pattern are interned + for (unsigned i = 0u; i < 3; ++i) { + assert(!cursor.pattern[i] || cursor.pattern[i] == first->nodes[i]); + } + } +#endif + + return cursor; +} + +SerdCursor* +serd_cursor_copy(const SerdCursor* const cursor) +{ + if (!cursor) { + return NULL; + } + + SerdCursor* const copy = (SerdCursor* const)malloc(sizeof(SerdCursor)); + memcpy(copy, cursor, sizeof(SerdCursor)); + return copy; +} + +const SerdStatement* +serd_cursor_get(const SerdCursor* const cursor) +{ + return ((!zix_btree_iter_is_end(cursor->iter) && check_version(cursor)) + ? (const SerdStatement*)zix_btree_get(cursor->iter) + : NULL); +} + +SerdStatus +serd_cursor_scan_next(SerdCursor* const cursor) +{ + if (zix_btree_iter_is_end(cursor->iter)) { + return SERD_FAILURE; + } + + switch (cursor->strategy.mode) { + case SCAN_EVERYTHING: + break; + + case SCAN_RANGE: + if (!serd_cursor_in_range(cursor)) { + // Went past the end of the matching range + cursor->iter = zix_btree_end_iter; + return SERD_FAILURE; + } + break; + + case FILTER_EVERYTHING: + case FILTER_RANGE: + // Seek forward to next match + return serd_cursor_seek_match(cursor); + } + + return SERD_SUCCESS; +} + +SerdStatus +serd_cursor_advance(SerdCursor* const cursor) +{ + if (zix_btree_iter_is_end(cursor->iter) || !check_version(cursor)) { + return SERD_ERR_BAD_CURSOR; + } + + const ZixStatus zst = zix_btree_iter_increment(&cursor->iter); + if (zst) { + assert(zst == ZIX_STATUS_REACHED_END); + return SERD_FAILURE; + } + + return serd_cursor_scan_next(cursor); +} + +bool +serd_cursor_is_end(const SerdCursor* const cursor) +{ + return !cursor || zix_btree_iter_is_end(cursor->iter); +} + +bool +serd_cursor_equals(const SerdCursor* const lhs, const SerdCursor* const rhs) +{ + if (serd_cursor_is_end(lhs) || serd_cursor_is_end(rhs)) { + return serd_cursor_is_end(lhs) && serd_cursor_is_end(rhs); + } + + /* We don't bother checking if the patterns match each other here, or if both + cursors have the same ordering, since both of these must be true if the + BTree iterators are equal. */ + + return (zix_btree_iter_equals(lhs->iter, rhs->iter) && + lhs->strategy.mode == rhs->strategy.mode && + lhs->strategy.n_prefix == rhs->strategy.n_prefix); +} + +void +serd_cursor_free(SerdCursor* const cursor) +{ + free(cursor); +} diff --git a/src/cursor.h b/src/cursor.h new file mode 100644 index 00000000..0a786866 --- /dev/null +++ b/src/cursor.h @@ -0,0 +1,83 @@ +/* + Copyright 2011-2020 David Robillard <d@drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_CURSOR_H +#define SERD_CURSOR_H + +#include "statement.h" + +#include "serd/serd.h" +#include "zix/btree.h" + +#include <stdbool.h> +#include <stddef.h> + +#define N_STATEMENT_ORDERS 12u + +/// An iteration mode that determines what to skip and when to end +typedef enum { + SCAN_EVERYTHING, ///< Iterate over entire store + SCAN_RANGE, ///< Iterate over range with equal prefix + FILTER_EVERYTHING, ///< Iterate to end of store, filtering + FILTER_RANGE, ///< Iterate over range with equal prefix, filtering +} ScanMode; + +/// A strategy for searching and iterating over a statement index +typedef struct { + ScanMode mode; ///< Iteration mode + unsigned n_prefix; ///< Number of prefix nodes that match the index + SerdStatementOrder order; ///< Order of index to scan +} ScanStrategy; + +struct SerdCursorImpl { + const SerdModel* model; ///< Model being iterated over + const SerdNode* pattern[4]; ///< Search pattern (nodes in model or null) + size_t version; ///< Model version when iterator was created + ZixBTreeIter iter; ///< Current position in index + ScanStrategy strategy; ///< Index scanning strategy +}; + +/// Lookup table of ordered indices for each SerdStatementOrder +static const unsigned orderings[N_STATEMENT_ORDERS][4] = { + {0u, 1u, 2u, 3u}, // SPOG + {0u, 2u, 1u, 3u}, // SOPG + {2u, 1u, 0u, 3u}, // OPSG + {2u, 0u, 1u, 3u}, // OPSG + {1u, 0u, 2u, 3u}, // PSOG + {1u, 2u, 0u, 3u}, // PSOG + {3u, 0u, 1u, 2u}, // GSPO + {3u, 0u, 2u, 1u}, // GSPO + {3u, 2u, 1u, 0u}, // GOPS + {3u, 2u, 0u, 1u}, // GOPS + {3u, 1u, 0u, 2u}, // GPSO + {3u, 1u, 2u, 0u} // GPSO +}; + +SerdCursor +serd_cursor_make(const SerdModel* model, + ZixBTreeIter iter, + const SerdQuad pattern, + ScanStrategy strategy); + +SerdStatus +serd_cursor_scan_next(SerdCursor* cursor); + +bool +serd_iter_in_range(ZixBTreeIter iter, + const SerdQuad pattern, + ScanStrategy strategy); + +#endif // SERD_CURSOR_H diff --git a/src/describe.c b/src/describe.c new file mode 100644 index 00000000..5ddbf7d0 --- /dev/null +++ b/src/describe.c @@ -0,0 +1,266 @@ +/* + Copyright 2011-2020 David Robillard <d@drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "cursor.h" +#include "model.h" +#include "world.h" + +// Define the types used in the hash interface for more type safety +#define ZIX_HASH_KEY_TYPE const SerdNode +#define ZIX_HASH_RECORD_TYPE const SerdNode + +#include "serd/serd.h" +#include "zix/common.h" +#include "zix/digest.h" +#include "zix/hash.h" + +#include <assert.h> +#include <stdbool.h> +#include <stdlib.h> + +typedef enum { NAMED, ANON_S, ANON_O, LIST_S, LIST_O } NodeStyle; + +static SerdStatus +write_range_statement(const SerdSink* sink, + const SerdModel* model, + ZixHash* list_subjects, + unsigned depth, + SerdStatementFlags flags, + const SerdStatement* statement); + +static NodeStyle +get_node_style(const SerdModel* const model, const SerdNode* const node) +{ + if (serd_node_type(node) != SERD_BLANK) { + return NAMED; // Non-blank node can't be anonymous + } + + const size_t n_as_object = serd_model_count(model, NULL, NULL, node, NULL); + if (n_as_object > 1) { + return NAMED; // Blank node referred to several times + } + + if (serd_model_count(model, node, model->world->rdf_first, NULL, NULL) == 1 && + serd_model_count(model, node, model->world->rdf_rest, NULL, NULL) == 1 && + !serd_model_ask(model, NULL, model->world->rdf_rest, node, NULL)) { + return n_as_object == 0 ? LIST_S : LIST_O; + } + + return n_as_object == 0 ? ANON_S : ANON_O; +} + +static const SerdNode* +identity(const SerdNode* const record) +{ + return record; +} + +static ZixHashCode +ptr_hash(const SerdNode* const ptr) +{ + return zix_digest(0u, &ptr, sizeof(SerdNode*)); +} + +static bool +ptr_equals(const SerdNode* const a, const SerdNode* const b) +{ + return *(const void* const*)a == *(const void* const*)b; +} + +static SerdStatus +write_pretty_range(const SerdSink* const sink, + const unsigned depth, + const SerdModel* const model, + SerdCursor* const range) +{ + ZixHash* const list_subjects = zix_hash_new(identity, ptr_hash, ptr_equals); + SerdStatus st = SERD_SUCCESS; + + while (!st && !serd_cursor_is_end(range)) { + const SerdStatement* const statement = serd_cursor_get(range); + assert(statement); + + if (!(st = write_range_statement( + sink, model, list_subjects, depth, 0, statement))) { + st = serd_cursor_advance(range); + } + } + + zix_hash_free(list_subjects); + + return st > SERD_FAILURE ? st : SERD_SUCCESS; +} + +static SerdStatus +write_list(const SerdSink* const sink, + const SerdModel* const model, + ZixHash* const list_subjects, + const unsigned depth, + SerdStatementFlags flags, + const SerdNode* object, + const SerdNode* const graph) +{ + const SerdWorld* const world = model->world; + const SerdNode* const rdf_first = world->rdf_first; + const SerdNode* const rdf_rest = world->rdf_rest; + const SerdNode* const rdf_nil = world->rdf_nil; + SerdStatus st = SERD_SUCCESS; + + const SerdStatement* fs = + serd_model_get_statement(model, object, rdf_first, NULL, graph); + + assert(fs); // Shouldn't get here if it doesn't at least have an rdf:first + + while (!st && !serd_node_equals(object, rdf_nil)) { + // Write rdf:first statement for this node + if ((st = write_range_statement( + sink, model, list_subjects, depth, flags, fs))) { + return st; + } + + // Get rdf:rest statement + const SerdStatement* const rs = + serd_model_get_statement(model, object, rdf_rest, NULL, graph); + + if (!rs) { + // Terminate malformed list with missing rdf:rest + return serd_sink_write(sink, 0, object, rdf_rest, rdf_nil, graph); + } + + // Terminate if the next node has no rdf:first + const SerdNode* const next = serd_statement_object(rs); + if (!(fs = serd_model_get_statement(model, next, rdf_first, NULL, graph))) { + return serd_sink_write(sink, 0, object, rdf_rest, rdf_nil, graph); + } + + // Write rdf:next statement and move to the next node + st = serd_sink_write_statement(sink, 0, rs); + object = next; + flags = 0u; + } + + return st; +} + +static bool +skip_range_statement(const SerdModel* const model, + const SerdStatement* const statement) +{ + const SerdNode* const subject = serd_statement_subject(statement); + const NodeStyle subject_style = get_node_style(model, subject); + const SerdNode* const predicate = serd_statement_predicate(statement); + + if (subject_style == ANON_O || subject_style == LIST_O) { + return true; // Skip subject that will be inlined elsewhere + } + + if (subject_style == LIST_S && + (serd_node_equals(predicate, model->world->rdf_first) || + serd_node_equals(predicate, model->world->rdf_rest))) { + return true; // Skip list statement that write_list will handle + } + + return false; +} + +static SerdStatus +write_range_statement(const SerdSink* const sink, + const SerdModel* const model, + ZixHash* const list_subjects, + const unsigned depth, + SerdStatementFlags flags, + const SerdStatement* SERD_NONNULL statement) +{ + const SerdNode* const subject = serd_statement_subject(statement); + const NodeStyle subject_style = get_node_style(model, subject); + const SerdNode* const object = serd_statement_object(statement); + const NodeStyle object_style = get_node_style(model, object); + const SerdNode* const graph = serd_statement_graph(statement); + SerdStatus st = SERD_SUCCESS; + + if (subject_style == ANON_S) { // Write anonymous subject like "[] p o" + flags |= SERD_EMPTY_S; + } + + if (depth == 0u) { + if (skip_range_statement(model, statement)) { + return SERD_SUCCESS; // Skip subject that will be inlined elsewhere + } + + if (subject_style == LIST_S) { + // First write inline list subject, which this statement will follow + if (zix_hash_insert(list_subjects, subject) != ZIX_STATUS_EXISTS) { + st = write_list( + sink, model, list_subjects, 2, SERD_LIST_S, subject, graph); + } + } + } + + if (st) { + return st; + } + + if (object_style == ANON_O) { // Write anonymous object like "[ ... ]" + SerdCursor* const iter = serd_model_find(model, object, NULL, NULL, NULL); + + flags |= SERD_ANON_O; + if (!(st = serd_sink_write_statement(sink, flags, statement))) { + if (!(st = write_pretty_range(sink, depth + 1, model, iter))) { + st = serd_sink_write_end(sink, object); + } + } + + serd_cursor_free(iter); + + } else if (object_style == LIST_O) { // Write list object like "( ... )" + flags |= SERD_LIST_O; + if (!(st = serd_sink_write_statement(sink, flags, statement))) { + st = write_list(sink, model, list_subjects, depth + 1, 0, object, graph); + } + + } else { + st = serd_sink_write_statement(sink, flags, statement); + } + + return st; +} + +SerdStatus +serd_describe_range(const SerdCursor* const range, + const SerdSink* sink, + const SerdDescribeFlags flags) +{ + SerdStatus st = SERD_SUCCESS; + + if (serd_cursor_is_end(range)) { + return st; + } + + SerdCursor copy = *range; + + if (flags & SERD_NO_INLINE_OBJECTS) { + const SerdStatement* statement = NULL; + while (!st && (statement = serd_cursor_get(©))) { + if (!(st = serd_sink_write_statement(sink, 0, statement))) { + st = serd_cursor_advance(©); + } + } + } else { + st = write_pretty_range(sink, 0, range->model, ©); + } + + return st; +} diff --git a/src/inserter.c b/src/inserter.c new file mode 100644 index 00000000..f5fd6ef1 --- /dev/null +++ b/src/inserter.c @@ -0,0 +1,122 @@ +/* + Copyright 2011-2020 David Robillard <d@drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "model.h" +#include "statement.h" + +#include "serd/serd.h" + +#include <stdbool.h> +#include <stdlib.h> + +typedef struct { + SerdModel* model; + const SerdNode* default_graph; +} SerdInserterData; + +static bool +can_insert(SerdWorld* const world, const SerdNode* const node) +{ + if (node) { + switch (serd_node_type(node)) { + case SERD_LITERAL: + return can_insert(world, serd_node_datatype(node)); + + case SERD_URI: + if (!serd_uri_string_has_scheme(serd_node_string(node))) { + serd_logf(world, + SERD_LOG_LEVEL_ERROR, + "attempt to insert relative URI <%s> into model", + serd_node_string(node)); + return false; + } + break; + + case SERD_BLANK: + case SERD_VARIABLE: + break; + } + } + + return true; +} + +static SerdStatus +serd_inserter_on_statement(SerdInserterData* const data, + const SerdStatementFlags flags, + const SerdStatement* const statement) +{ + (void)flags; + + SerdModel* const model = data->model; + SerdWorld* const world = model->world; + + // Check that every node is expanded so it is context-free + for (unsigned i = 0; i < 4; ++i) { + if (!can_insert(world, serd_statement_node(statement, (SerdField)i))) { + return SERD_ERR_BAD_ARG; + } + } + + const SerdNode* const s = + serd_nodes_intern(model->nodes, serd_statement_subject(statement)); + + const SerdNode* const p = + serd_nodes_intern(model->nodes, serd_statement_predicate(statement)); + + const SerdNode* const o = + serd_nodes_intern(model->nodes, serd_statement_object(statement)); + + const SerdNode* const g = serd_nodes_intern( + model->nodes, + serd_statement_graph(statement) ? serd_statement_graph(statement) + : data->default_graph); + + const SerdCaret* const caret = + (data->model->flags & SERD_STORE_CARETS) ? statement->caret : NULL; + + const SerdStatus st = + serd_model_add_with_caret(data->model, s, p, o, g, caret); + + return st > SERD_FAILURE ? st : SERD_SUCCESS; +} + +static SerdStatus +serd_inserter_on_event(SerdInserterData* const data, + const SerdEvent* const event) +{ + if (event->type == SERD_STATEMENT) { + return serd_inserter_on_statement( + data, event->statement.flags, event->statement.statement); + } + + return SERD_SUCCESS; +} + +SerdSink* +serd_inserter_new(SerdModel* const model, const SerdNode* const default_graph) +{ + SerdInserterData* const data = + (SerdInserterData*)calloc(1, sizeof(SerdInserterData)); + + data->model = model; + data->default_graph = serd_node_copy(default_graph); + + SerdSink* const sink = + serd_sink_new(data, (SerdEventFunc)serd_inserter_on_event, free); + + return sink; +} diff --git a/src/model.c b/src/model.c new file mode 100644 index 00000000..0485b509 --- /dev/null +++ b/src/model.c @@ -0,0 +1,728 @@ +/* + Copyright 2011-2020 David Robillard <d@drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "model.h" + +#include "caret.h" +#include "compare.h" +#include "cursor.h" +#include "statement.h" + +#include "zix/btree.h" +#include "zix/common.h" + +#include <assert.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdlib.h> + +static const SerdQuad everything_pattern = {0, 0, 0, 0}; + +/// A 3-bit signature for a triple pattern used as a table index +typedef enum { + SERD_SIGNATURE_XXX, // 000 (???) + SERD_SIGNATURE_XXO, // 001 (??O) + SERD_SIGNATURE_XPX, // 010 (?P?) + SERD_SIGNATURE_XPO, // 011 (?PO) + SERD_SIGNATURE_SXX, // 100 (S??) + SERD_SIGNATURE_SXO, // 101 (S?O) + SERD_SIGNATURE_SPX, // 110 (SP?) + SERD_SIGNATURE_SPO, // 111 (SPO) +} SerdPatternSignature; + +static ZixComparator +serd_model_index_comparator(const SerdModel* const model, + const SerdStatementOrder order) +{ + return (order < SERD_ORDER_GSPO && !(model->flags & SERD_STORE_GRAPHS)) + ? serd_triple_compare + : serd_quad_compare; +} + +static ZixComparator +serd_model_pattern_comparator(const SerdModel* const model, + const SerdStatementOrder order) +{ + return (order < SERD_ORDER_GSPO && !(model->flags & SERD_STORE_GRAPHS)) + ? serd_triple_compare_pattern + : serd_quad_compare_pattern; +} + +SerdStatus +serd_model_add_index(SerdModel* const model, const SerdStatementOrder order) +{ + if (model->indices[order]) { + return SERD_FAILURE; + } + + const unsigned* const ordering = orderings[order]; + const ZixComparator comparator = serd_model_index_comparator(model, order); + + model->indices[order] = zix_btree_new(comparator, ordering); + + ZixStatus zst = model->indices[order] ? ZIX_STATUS_SUCCESS : ZIX_STATUS_ERROR; + + // Insert statements from the default index + if (order != model->default_order) { + ZixBTree* const default_index = model->indices[model->default_order]; + for (ZixBTreeIter i = zix_btree_begin(default_index); + !zst && !zix_btree_iter_is_end(i); + zix_btree_iter_increment(&i)) { + zst = zix_btree_insert(model->indices[order], zix_btree_get(i)); + } + } + + return zst ? serd_model_drop_index(model, order) : SERD_SUCCESS; +} + +SerdStatus +serd_model_drop_index(SerdModel* const model, const SerdStatementOrder order) +{ + if (!model->indices[order]) { + return SERD_FAILURE; + } + + if (order == model->default_order) { + return SERD_ERR_BAD_CALL; + } + + zix_btree_free(model->indices[order], NULL); + model->indices[order] = NULL; + return SERD_SUCCESS; +} + +SerdModel* +serd_model_new(SerdWorld* const world, + const SerdStatementOrder default_order, + const SerdModelFlags flags) +{ + SerdModel* model = (SerdModel*)calloc(1, sizeof(struct SerdModelImpl)); + + model->world = world; + model->nodes = serd_nodes_new(); + model->default_order = default_order; + model->flags = flags; + + serd_model_add_index(model, default_order); + + const ScanStrategy end_strategy = {SCAN_EVERYTHING, 0u, default_order}; + + model->end = serd_cursor_make(model, + zix_btree_end(model->indices[default_order]), + everything_pattern, + end_strategy); + + return model; +} + +SerdModel* +serd_model_copy(const SerdModel* const model) +{ + SerdModel* copy = + serd_model_new(model->world, model->default_order, model->flags); + + SerdCursor* cursor = serd_model_begin(model); + serd_model_insert_statements(copy, cursor); + serd_cursor_free(cursor); + + assert(serd_model_size(model) == serd_model_size(copy)); + assert(serd_nodes_size(model->nodes) == serd_nodes_size(copy->nodes)); + return copy; +} + +static void +log_bad_index(const SerdModel* const model, + const char* const description, + const SerdStatementOrder index_order, + const bool s, + const bool p, + const bool o, + const bool g) +{ + static const char* const order_strings[] = { + "S P O G", + "S O P G", + "O P S G", + "O S P G", + "P S O G", + "P O S G", + "G S P O", + "G S O P", + "G O P S", + "G O S P", + "G P S O", + "G P O S", + }; + + serd_logf(model->world, + SERD_LOG_LEVEL_WARNING, + "%s index (%s) for (%s %s %s%s) query", + description, + order_strings[index_order], + s ? "S" : "?", + p ? "P" : "?", + o ? "O" : "?", + (model->flags & SERD_STORE_GRAPHS) ? g ? " G" : " ?" : ""); +} + +static SerdCursor +make_begin_cursor(const SerdModel* const model, const SerdStatementOrder order) +{ + if (!model->indices[order]) { + log_bad_index(model, "missing", order, false, false, false, false); + return model->end; + } + + const ScanStrategy strategy = {SCAN_EVERYTHING, 0u, order}; + + return serd_cursor_make(model, + zix_btree_begin(model->indices[order]), + everything_pattern, + strategy); +} + +bool +serd_model_equals(const SerdModel* const a, const SerdModel* const b) +{ + if (!a && !b) { + return true; + } + + if (!a || !b || serd_model_size(a) != serd_model_size(b)) { + return false; + } + + SerdCursor ia = make_begin_cursor(a, a->default_order); + SerdCursor ib = make_begin_cursor(b, b->default_order); + + while (!serd_cursor_is_end(&ia) && !serd_cursor_is_end(&ib)) { + if (!serd_statement_equals(serd_cursor_get(&ia), serd_cursor_get(&ib)) || + serd_cursor_advance(&ia) > SERD_FAILURE || + serd_cursor_advance(&ib) > SERD_FAILURE) { + return false; + } + } + + return serd_cursor_is_end(&ia) && serd_cursor_is_end(&ib); +} + +static void +serd_model_drop_statement(SerdModel* const model, + SerdStatement* const statement) +{ + assert(statement); + + for (unsigned i = 0u; i < 4; ++i) { + if (statement->nodes[i]) { + serd_nodes_deref(model->nodes, statement->nodes[i]); + } + } + + if (statement->caret && statement->caret->file) { + serd_nodes_deref(model->nodes, statement->caret->file); + } + + serd_statement_free(statement); +} + +void +serd_model_free(SerdModel* const model) +{ + if (!model) { + return; + } + + // Free all statements (which are owned by the default index) + ZixBTree* const default_index = model->indices[model->default_order]; + zix_btree_clear(default_index, (ZixDestroyFunc)serd_statement_free); + + // Free indices themselves + for (unsigned i = 0u; i < N_STATEMENT_ORDERS; ++i) { + zix_btree_free(model->indices[i], NULL); + } + + serd_nodes_free(model->nodes); + free(model); +} + +SerdWorld* +serd_model_world(SerdModel* const model) +{ + return model->world; +} + +const SerdNodes* +serd_model_nodes(const SerdModel* const model) +{ + return model->nodes; +} + +SerdStatementOrder +serd_model_default_order(const SerdModel* const model) +{ + return model->default_order; +} + +SerdModelFlags +serd_model_flags(const SerdModel* const model) +{ + return model->flags; +} + +size_t +serd_model_size(const SerdModel* const model) +{ + return zix_btree_size(model->indices[model->default_order]); +} + +bool +serd_model_empty(const SerdModel* const model) +{ + return serd_model_size(model) == 0; +} + +SerdCursor* +serd_model_begin_ordered(const SerdModel* const model, + const SerdStatementOrder order) +{ + const SerdCursor cursor = make_begin_cursor(model, order); + + return serd_cursor_copy(&cursor); +} + +SerdCursor* +serd_model_begin(const SerdModel* const model) +{ + return serd_model_begin_ordered(model, model->default_order); +} + +const SerdCursor* +serd_model_end(const SerdModel* const model) +{ + return &model->end; +} + +static SerdStatementOrder +simple_order(const SerdStatementOrder order) +{ + return order < SERD_ORDER_GSPO ? order : (SerdStatementOrder)(order - 6u); +} + +/// Return the best index scanning strategy for a pattern with given nodes +static ScanStrategy +serd_model_strategy(const SerdModel* const model, + const bool with_s, + const bool with_p, + const bool with_o, + const bool with_g) +{ +#define N_STRATEGY_TIERS 4u + + const SerdStatementOrder default_order = simple_order(model->default_order); + const ScanStrategy none = {SCAN_EVERYTHING, 0u, default_order}; + + const ScanStrategy strategies[N_STRATEGY_TIERS][8] = { + // Preferred perfect strategies: SPO, SOP, OPS, PSO + { + none, + {SCAN_RANGE, 1u, SERD_ORDER_OPS}, // ??O + {SCAN_RANGE, 1u, SERD_ORDER_PSO}, // ?P? + {SCAN_RANGE, 2u, SERD_ORDER_OPS}, // ?PO + {SCAN_RANGE, 1u, SERD_ORDER_SPO}, // S?? + {SCAN_RANGE, 2u, SERD_ORDER_SOP}, // S?O + {SCAN_RANGE, 2u, SERD_ORDER_SPO}, // SP? + {SCAN_RANGE, 3u, default_order} // SPO + }, + + // Alternate perfect strategies: adds OSP and POS + { + none, // ??? + {SCAN_RANGE, 1u, SERD_ORDER_OSP}, // ??O + {SCAN_RANGE, 1u, SERD_ORDER_POS}, // ?P? + {SCAN_RANGE, 2u, SERD_ORDER_POS}, // ?PO + {SCAN_RANGE, 1u, SERD_ORDER_SOP}, // S?? + {SCAN_RANGE, 2u, SERD_ORDER_OSP}, // S?O + {SCAN_RANGE, 2u, SERD_ORDER_PSO}, // SP? + none // SPO + }, + + // Preferred partial prefix strategies + { + none, // ??? + none, // ??O + none, // ?P? + {FILTER_RANGE, 1u, SERD_ORDER_PSO}, // ?PO + none, // S?? + {FILTER_RANGE, 1u, SERD_ORDER_SPO}, // S?O + {FILTER_RANGE, 1u, SERD_ORDER_SOP}, // SP? + none // SPO + }, + + // Alternate partial prefix strategies + { + none, // ??? + none, // ??O + none, // ?P? + {FILTER_RANGE, 1u, SERD_ORDER_OSP}, // ?PO + none, // S?? + {FILTER_RANGE, 1u, SERD_ORDER_OPS}, // S?O + {FILTER_RANGE, 1u, SERD_ORDER_POS}, // SP? + none // SPO + }, + }; + + // Construct a 3-bit signature for this triple pattern + const SerdPatternSignature sig = + (SerdPatternSignature)(((with_s ? 1u : 0u) << 2u) + // + ((with_p ? 1u : 0u) << 1u) + // + ((with_o ? 1u : 0u))); + + if (!sig && !with_g) { + return none; + } + + // Try to find a strategy we can support, from most to least preferred + for (unsigned t = 0u; t < N_STRATEGY_TIERS; ++t) { + ScanStrategy strategy = strategies[t][sig]; + const SerdStatementOrder triple_order = strategy.order; + + assert(strategy.order < SERD_ORDER_GSPO); + + if (strategy.n_prefix > 0) { + if (with_g) { + const SerdStatementOrder quad_order = + (SerdStatementOrder)(triple_order + 6u); + + if (model->indices[quad_order]) { + strategy.order = quad_order; + ++strategy.n_prefix; + return strategy; + } + } + + if (model->indices[triple_order]) { + return strategy; + } + } + } + + // Indices don't help with the triple, try to at least stay in the graph + if (with_g) { + for (unsigned i = SERD_ORDER_GSPO; i <= SERD_ORDER_GPOS; ++i) { + if (model->indices[i]) { + const ScanMode mode = sig == 0x000 ? SCAN_RANGE : FILTER_RANGE; + const ScanStrategy strategy = {mode, 1u, (SerdStatementOrder)i}; + + return strategy; + } + } + } + + // All is lost, regress to linear search + const ScanStrategy linear = {FILTER_EVERYTHING, 0u, model->default_order}; + return linear; +} + +static SerdCursor +serd_model_search(const SerdModel* const model, + const SerdNode* const s, + const SerdNode* const p, + const SerdNode* const o, + const SerdNode* const g) +{ + // Build a pattern of interned nodes + const SerdQuad pattern = {serd_nodes_get(model->nodes, s), + serd_nodes_get(model->nodes, p), + serd_nodes_get(model->nodes, o), + serd_nodes_get(model->nodes, g)}; + + // If some node isn't in the model at all, no need to search for statements + const int n_given = !!s + !!p + !!o + !!g; + if (n_given != (!!pattern[0] + !!pattern[1] + !!pattern[2] + !!pattern[3])) { + return model->end; + } + + // Determine the best available search strategy + const ScanStrategy strategy = serd_model_strategy(model, s, p, o, g); + const SerdStatementOrder order = strategy.order; + ZixBTree* const index = model->indices[order]; + + // Issue any performance warnings, and return early for degenerate cases + switch (strategy.mode) { + case SCAN_EVERYTHING: + return make_begin_cursor(model, order); + case SCAN_RANGE: + break; + case FILTER_EVERYTHING: + log_bad_index(model, "using effectively linear", order, s, p, o, g); + return serd_cursor_make(model, zix_btree_begin(index), pattern, strategy); + case FILTER_RANGE: + log_bad_index(model, "filtering partial", order, s, p, o, g); + break; + } + + // Find the first statement matching the pattern in the index + ZixBTreeIter iter = zix_btree_end_iter; + zix_btree_lower_bound(index, + serd_model_pattern_comparator(model, order), + orderings[order], + pattern, + &iter); + + return (!zix_btree_iter_is_end(iter) && + serd_iter_in_range(iter, pattern, strategy)) + ? serd_cursor_make(model, iter, pattern, strategy) + : model->end; +} + +SerdCursor* +serd_model_find(const SerdModel* const model, + const SerdNode* const s, + const SerdNode* const p, + const SerdNode* const o, + const SerdNode* const g) +{ + const SerdCursor cursor = serd_model_search(model, s, p, o, g); + + return zix_btree_iter_is_end(cursor.iter) ? NULL : serd_cursor_copy(&cursor); +} + +const SerdNode* +serd_model_get(const SerdModel* const model, + const SerdNode* const s, + const SerdNode* const p, + const SerdNode* const o, + const SerdNode* const g) +{ + const SerdStatement* const statement = + serd_model_get_statement(model, s, p, o, g); + + return !statement ? NULL + : !s ? serd_statement_subject(statement) + : !p ? serd_statement_predicate(statement) + : !o ? serd_statement_object(statement) + : !g ? serd_statement_graph(statement) + : NULL; +} + +const SerdStatement* +serd_model_get_statement(const SerdModel* const model, + const SerdNode* const s, + const SerdNode* const p, + const SerdNode* const o, + const SerdNode* const g) +{ + if ((bool)s + (bool)p + (bool)o != 2 && + (bool)s + (bool)p + (bool)o + (bool)g != 3) { + return NULL; + } + + const SerdCursor i = serd_model_search(model, s, p, o, g); + + return serd_cursor_get(&i); +} + +size_t +serd_model_count(const SerdModel* const model, + const SerdNode* const s, + const SerdNode* const p, + const SerdNode* const o, + const SerdNode* const g) +{ + SerdCursor i = serd_model_search(model, s, p, o, g); + size_t count = 0; + + for (; !serd_cursor_is_end(&i); serd_cursor_advance(&i)) { + ++count; + } + + return count; +} + +bool +serd_model_ask(const SerdModel* const model, + const SerdNode* const s, + const SerdNode* const p, + const SerdNode* const o, + const SerdNode* const g) +{ + const SerdCursor c = serd_model_search(model, s, p, o, g); + + return !serd_cursor_is_end(&c); +} + +static SerdCaret* +serd_model_intern_caret(SerdModel* const model, const SerdCaret* const caret) +{ + if (caret) { + SerdCaret* copy = (SerdCaret*)calloc(1, sizeof(SerdCaret)); + + copy->file = serd_nodes_intern(model->nodes, caret->file); + copy->line = caret->line; + copy->col = caret->col; + return copy; + } + + return NULL; +} + +SerdStatus +serd_model_add_with_caret(SerdModel* const model, + const SerdNode* const s, + const SerdNode* const p, + const SerdNode* const o, + const SerdNode* const g, + const SerdCaret* const caret) +{ + SerdStatement* const statement = serd_statement_new(s, p, o, g, NULL); + if (!statement) { + return SERD_ERR_UNKNOWN; + } + + statement->caret = serd_model_intern_caret(model, caret); + + bool added = false; + for (unsigned i = 0u; i < N_STATEMENT_ORDERS; ++i) { + if (model->indices[i]) { + if (!zix_btree_insert(model->indices[i], statement)) { + added = true; + } else if ((SerdStatementOrder)i == model->default_order) { + break; // Statement already indexed + } + } + } + + ++model->version; + if (added) { + return SERD_SUCCESS; + } + + serd_model_drop_statement(model, statement); + return SERD_FAILURE; +} + +SerdStatus +serd_model_add(SerdModel* const model, + const SerdNode* const s, + const SerdNode* const p, + const SerdNode* const o, + const SerdNode* const g) +{ + return serd_model_add_with_caret(model, + serd_nodes_intern(model->nodes, s), + serd_nodes_intern(model->nodes, p), + serd_nodes_intern(model->nodes, o), + serd_nodes_intern(model->nodes, g), + NULL); +} + +SerdStatus +serd_model_insert(SerdModel* const model, const SerdStatement* const statement) +{ + SerdNodes* const nodes = model->nodes; + + return serd_model_add_with_caret( + model, + serd_nodes_intern(nodes, serd_statement_subject(statement)), + serd_nodes_intern(nodes, serd_statement_predicate(statement)), + serd_nodes_intern(nodes, serd_statement_object(statement)), + serd_nodes_intern(nodes, serd_statement_graph(statement)), + serd_statement_caret(statement)); +} + +SerdStatus +serd_model_insert_statements(SerdModel* const model, SerdCursor* const range) +{ + const SerdStatement* statement = serd_cursor_get(range); + SerdStatus st = SERD_SUCCESS; + + while (!st && statement) { + if (!(st = serd_model_insert(model, statement))) { + st = serd_cursor_advance(range); + } + + statement = serd_cursor_get(range); + } + + return st; +} + +SerdStatus +serd_model_erase(SerdModel* const model, SerdCursor* const cursor) +{ + const SerdStatement* statement = serd_cursor_get(cursor); + SerdStatement* removed = NULL; + ZixStatus zst = ZIX_STATUS_SUCCESS; + + if (!statement) { + return SERD_FAILURE; + } + + // Erase from the index associated with this cursor + zst = zix_btree_remove(model->indices[cursor->strategy.order], + statement, + (void**)&removed, + &cursor->iter); + + // No error should be possible with a valid cursor + assert(!zst); + assert(removed); + assert(removed == statement); + + for (unsigned i = SERD_ORDER_SPO; i <= SERD_ORDER_GPOS; ++i) { + ZixBTree* index = model->indices[i]; + + if (index && i != (unsigned)cursor->strategy.order) { + SerdStatement* ignored = NULL; + ZixBTreeIter next = zix_btree_end_iter; + zst = zix_btree_remove(index, statement, (void**)&ignored, &next); + + assert(zst == ZIX_STATUS_SUCCESS || zst == ZIX_STATUS_NOT_FOUND); + assert(!ignored || ignored == removed); + } + } + serd_cursor_scan_next(cursor); + + serd_model_drop_statement(model, removed); + cursor->version = ++model->version; + + (void)zst; + return SERD_SUCCESS; +} + +SerdStatus +serd_model_erase_statements(SerdModel* const model, SerdCursor* const range) +{ + SerdStatus st = SERD_SUCCESS; + + while (!st && !serd_cursor_is_end(range)) { + st = serd_model_erase(model, range); + } + + return st; +} + +SerdStatus +serd_model_clear(SerdModel* const model) +{ + SerdCursor i = make_begin_cursor(model, model->default_order); + + while (!serd_cursor_is_end(&i)) { + serd_model_erase(model, &i); + } + + return SERD_SUCCESS; +} diff --git a/src/model.h b/src/model.h new file mode 100644 index 00000000..5fbf6209 --- /dev/null +++ b/src/model.h @@ -0,0 +1,37 @@ +/* + Copyright 2011-2020 David Robillard <d@drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_MODEL_H +#define SERD_MODEL_H + +#include "cursor.h" + +#include "serd/serd.h" +#include "zix/btree.h" + +#include <stddef.h> + +struct SerdModelImpl { + SerdWorld* world; ///< World this model is a part of + SerdNodes* nodes; ///< Interned nodes in this model + ZixBTree* indices[12]; ///< Trees of SerdStatement pointers + SerdCursor end; ///< End cursor (always the same) + size_t version; ///< Version incremented on every change + SerdStatementOrder default_order; ///< Order for main statement-owning index + SerdModelFlags flags; ///< Active indices and features +}; + +#endif // SERD_MODEL_H diff --git a/src/reader.c b/src/reader.c index 5afc28ff..91e61f72 100644 --- a/src/reader.c +++ b/src/reader.c @@ -91,7 +91,7 @@ tolerate_status(const SerdReader* const reader, const SerdStatus status) if (status == SERD_ERR_INTERNAL || status == SERD_ERR_OVERFLOW || status == SERD_ERR_BAD_WRITE || status == SERD_ERR_NO_DATA || - status == SERD_ERR_BAD_CALL) { + status == SERD_ERR_BAD_CALL || status == SERD_ERR_BAD_CURSOR) { return false; } diff --git a/src/serdi.c b/src/serdi.c index 2acae921..97601b83 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -61,11 +61,12 @@ print_usage(const char* const name, const bool error) fprintf(os, " -b Fast bulk output for large serialisations.\n"); fprintf(os, " -c PREFIX Chop PREFIX from matching blank node IDs.\n"); fprintf(os, " -e Eat input one character at a time.\n"); - fprintf(os, " -f Keep full URIs in input (don't qualify).\n"); + fprintf(os, " -f Fast and loose mode (possibly ugly output).\n"); fprintf(os, " -h Display this help and exit.\n"); fprintf(os, " -i SYNTAX Input syntax: turtle/ntriples/trig/nquads.\n"); fprintf(os, " -k BYTES Parser stack size.\n"); fprintf(os, " -l Lax (non-strict) parsing.\n"); + fprintf(os, " -m Build a model in memory before writing.\n"); fprintf(os, " -o SYNTAX Output syntax: empty/turtle/ntriples/nquads.\n"); fprintf(os, " -p PREFIX Add PREFIX to blank node IDs.\n"); fprintf(os, " -q Suppress all output except data.\n"); @@ -149,7 +150,9 @@ main(int argc, char** argv) SerdWriterFlags writer_flags = 0; bool bulk_read = true; bool bulk_write = false; + bool no_inline = false; bool osyntax_set = false; + bool use_model = false; bool quiet = false; size_t stack_size = 4194304; const char* input_string = NULL; @@ -173,12 +176,15 @@ main(int argc, char** argv) } else if (opt == 'e') { bulk_read = false; } else if (opt == 'f') { + no_inline = true; writer_flags |= (SERD_WRITE_UNQUALIFIED | SERD_WRITE_UNRESOLVED); } else if (opt == 'h') { return print_usage(prog, false); } else if (opt == 'l') { reader_flags |= SERD_READ_LAX; writer_flags |= SERD_WRITE_LAX; + } else if (argv[a][1] == 'm') { + use_model = true; } else if (opt == 'q') { quiet = true; } else if (opt == 't') { @@ -312,6 +318,9 @@ main(int argc, char** argv) } #endif + const SerdDescribeFlags describe_flags = + no_inline ? SERD_NO_INLINE_OBJECTS : 0u; + const size_t block_size = bulk_write ? 4096u : 1u; SerdByteSink* const byte_sink = out_filename @@ -326,6 +335,30 @@ main(int argc, char** argv) SerdWriter* const writer = serd_writer_new(world, output_syntax, writer_flags, env, byte_sink); + SerdModel* model = NULL; + SerdSink* inserter = NULL; + const SerdSink* sink = NULL; + if (use_model) { + const SerdModelFlags flags = (input_has_graphs ? SERD_STORE_GRAPHS : 0u); + + model = serd_model_new(world, SERD_ORDER_SPO, flags); + if (input_has_graphs) { + serd_model_add_index(model, SERD_ORDER_GSPO); + } + + if (!no_inline) { + serd_model_add_index(model, SERD_ORDER_OPS); + if (input_has_graphs) { + serd_model_add_index(model, SERD_ORDER_GOPS); + } + } + + inserter = serd_inserter_new(model, NULL); + sink = inserter; + } else { + sink = serd_writer_sink(writer); + } + if (quiet) { serd_set_log_func(world, serd_quiet_log_func, NULL); } @@ -349,7 +382,7 @@ main(int argc, char** argv) input_syntax ? input_syntax : SERD_TRIG, reader_flags, env, - serd_writer_sink(writer), + sink, stack_size); serd_reader_add_blank_prefix(reader, add_prefix); @@ -394,7 +427,7 @@ main(int argc, char** argv) input_syntax, reader_flags, env, - serd_writer_sink(writer), + sink, stack_size, inputs[i], n_inputs > 1 ? prefix : add_prefix, @@ -404,6 +437,26 @@ main(int argc, char** argv) } free(prefix); + if (st <= SERD_FAILURE && use_model) { + const SerdSink* writer_sink = serd_writer_sink(writer); + SerdCursor* everything = serd_model_begin_ordered( + model, input_has_graphs ? SERD_ORDER_GSPO : SERD_ORDER_SPO); + + serd_env_write_prefixes(env, writer_sink); + + st = serd_describe_range( + everything, + writer_sink, + describe_flags | + ((output_syntax == SERD_NTRIPLES || output_syntax == SERD_NQUADS) + ? SERD_NO_INLINE_OBJECTS + : 0u)); + + serd_cursor_free(everything); + } + + serd_sink_free(inserter); + serd_model_free(model); serd_writer_free(writer); serd_node_free(input_name); serd_env_free(env); diff --git a/src/statement.c b/src/statement.c index d6571c6c..7bb2486f 100644 --- a/src/statement.c +++ b/src/statement.c @@ -50,7 +50,13 @@ serd_statement_new(const SerdNode* const s, const SerdNode* const g, const SerdCaret* const caret) { - assert(serd_statement_is_valid(s, p, o, g)); + assert(s); + assert(p); + assert(o); + + if (!serd_statement_is_valid(s, p, o, g)) { + return NULL; + } SerdStatement* statement = (SerdStatement*)malloc(sizeof(SerdStatement)); if (statement) { diff --git a/src/string.c b/src/string.c index 2d25fb80..11b53050 100644 --- a/src/string.c +++ b/src/string.c @@ -38,6 +38,8 @@ serd_strerror(const SerdStatus status) return "Invalid syntax"; case SERD_ERR_BAD_ARG: return "Invalid argument"; + case SERD_ERR_BAD_CURSOR: + return "Invalid cursor"; case SERD_ERR_NOT_FOUND: return "Not found"; case SERD_ERR_ID_CLASH: @@ -58,6 +60,8 @@ serd_strerror(const SerdStatus status) return "Invalid call"; case SERD_ERR_BAD_URI: return "Invalid or unresolved URI"; + case SERD_ERR_BAD_INDEX: + return "No optimal model index available"; } return "Unknown error"; |