diff options
-rw-r--r-- | sord/sord.h | 68 | ||||
-rw-r--r-- | src/sord.c | 116 | ||||
-rw-r--r-- | src/sord_test.c | 82 | ||||
-rw-r--r-- | src/sordi.c | 5 | ||||
-rw-r--r-- | src/syntax.c | 13 |
5 files changed, 168 insertions, 116 deletions
diff --git a/sord/sord.h b/sord/sord.h index 3412afa..b7dd49e 100644 --- a/sord/sord.h +++ b/sord/sord.h @@ -50,9 +50,10 @@ * @{ */ -typedef struct _Sord* Sord; ///< Quad store -typedef struct _SordIter* SordIter; ///< Store iterator -typedef struct _SordNode* SordNode; ///< Node +typedef struct _SordWorld* SordWorld; ///< Sord world (library state) +typedef struct _Sord* Sord; ///< Quad store +typedef struct _SordIter* SordIter; ///< Store iterator +typedef struct _SordNode* SordNode; ///< Node /** Quad of IDs (statement), or a quad pattern. * Nodes are ordered (S P O G). The ID of the default graph is 0. @@ -84,26 +85,17 @@ typedef enum { SORD_POS = 1 << 5 ///< Predicate, Object, Subject } SordIndexOption; -/** @name Initialisation and Cleanup +/** @name World * @{ */ -/** Create a new store. - * @param indices SordIndexOption flags (e.g. SORD_SPO|SORD_OPS). - * Be sure to choose indices such that there is an index where the most - * significant node(s) are not variables for your queries. For example, - * if you are going to make (? P O) queries, you should enable either - * SORD_OPS or SORD_POS. - * @param graphs If true, store (and index) graph contexts. - */ SORD_API -Sord -sord_new(unsigned indices, bool graphs); +SordWorld +sord_world_new(void); -/** Close and free @a sord. */ SORD_API void -sord_free(Sord sord); +sord_world_free(SordWorld world); /** @} */ /** @name Nodes @@ -118,37 +110,37 @@ sord_free(Sord sord); */ SORD_API SordNode -sord_new_uri(Sord sord, const uint8_t* str); +sord_new_uri(SordWorld world, const uint8_t* str); /** Find a URI, creating a new one if necessary iff @a create is true. */ SORD_API SordNode -sord_new_uri_counted(Sord sord, const uint8_t* str, int str_len); +sord_new_uri_counted(SordWorld world, const uint8_t* str, int str_len); /** Find a blank, creating a new one if necessary iff @a create is true * Use sord_get_blank_counted instead if the length of @a str is known. */ SORD_API SordNode -sord_new_blank(Sord sord, const uint8_t* str); +sord_new_blank(SordWorld world, const uint8_t* str); /** Find a blank, creating a new one if necessary iff @a create is true. */ SORD_API SordNode -sord_new_blank_counted(Sord sord, const uint8_t* str, int str_len); +sord_new_blank_counted(SordWorld world, const uint8_t* str, int str_len); /** Find a literal, creating a new one if necessary iff @a create is true. * Use sord_get_literal_counted instead if the length of @a str is known. */ SORD_API SordNode -sord_new_literal(Sord sord, SordNode datatype, +sord_new_literal(SordWorld world, SordNode datatype, const uint8_t* str, const char* lang); /** Find a literal, creating a new one if necessary iff @a create is true. */ SORD_API SordNode -sord_new_literal_counted(Sord sord, SordNode datatype, +sord_new_literal_counted(SordWorld world, SordNode datatype, const uint8_t* str, int str_len, const char* lang, uint8_t lang_len); @@ -185,16 +177,38 @@ bool sord_node_equals(const SordNode a, const SordNode b); /** @} */ -/** @name Read Operations +/** @name Store + * For brevity, the Sord store is simply referred to as a "Sord". * @{ */ +/** Create a new store. + * @param indices SordIndexOption flags (e.g. SORD_SPO|SORD_OPS). + * Be sure to choose indices such that there is an index where the most + * significant node(s) are not variables for your queries. For example, + * if you are going to make (? P O) queries, you should enable either + * SORD_OPS or SORD_POS. + * @param graphs If true, store (and index) graph contexts. + */ +SORD_API +Sord +sord_new(SordWorld world, unsigned indices, bool graphs); + +/** Close and free @a sord. */ +SORD_API +void +sord_free(Sord sord); + +SORD_API +SordWorld +sord_get_world(Sord sord); + /** Return the number of nodes stored in @a sord. * Nodes are included in this count iff they are a part of a quad in @a sord. */ SORD_API int -sord_num_nodes(Sord read); +sord_num_nodes(SordWorld world); /** Return the number of quads stored in @a sord. */ SORD_API @@ -218,12 +232,6 @@ SORD_API SordIter sord_find(Sord sord, const SordQuad pat); - -/** @} */ -/** @name Write Operations - * @{ - */ - /** Add a quad to the store. */ SORD_API void @@ -106,10 +106,16 @@ static const int orderings[NUM_ORDERS][TUP_LEN] = { {3,0,1,2 }, {3,0,2,1 }, {3,2,1,0 }, {3,2,0,1 }, {3,1,0,2 }, {3,1,2,0 } }; -/** Store */ -struct _Sord { +/** World */ +struct _SordWorld { GHashTable* names; ///< URI or blank node identifier string => ID GHashTable* literals; ///< Literal => ID + SordCount n_nodes; ///< Number of nodes +}; + +/** Store */ +struct _Sord { + SordWorld world; /** Index for each possible triple ordering (may or may not exist). * If an index for e.g. SPO exists, it is a dictionary with @@ -118,7 +124,6 @@ struct _Sord { GSequence* indices[NUM_ORDERS]; SordCount n_quads; - SordCount n_nodes; }; /** Mode for searching or iteration */ @@ -159,6 +164,24 @@ sord_literal_equal(const void* a, const void* b) sord_node_get_string(b_node)); } +SordWorld +sord_world_new(void) +{ + SordWorld world = malloc(sizeof(struct _SordWorld)); + world->names = g_hash_table_new_full(g_str_hash, g_str_equal, free, 0); + world->literals = g_hash_table_new_full(sord_literal_hash, sord_literal_equal, 0, 0); + world->n_nodes = 0; + return world; +} + +void +sord_world_free(SordWorld world) +{ + g_hash_table_unref(world->names); + g_hash_table_unref(world->literals); + free(world); +} + static inline int sord_node_compare(const SordNode a, const SordNode b) { @@ -522,13 +545,11 @@ sord_best_index(Sord sord, const SordQuad pat, SearchMode* mode, int* n_prefix) } Sord -sord_new(unsigned indices, bool graphs) +sord_new(SordWorld world, unsigned indices, bool graphs) { Sord sord = (Sord)malloc(sizeof(struct _Sord)); - sord->names = g_hash_table_new_full(g_str_hash, g_str_equal, free, 0); - sord->literals = g_hash_table_new_full(sord_literal_hash, sord_literal_equal, 0, 0); - sord->n_quads = 0; - sord->n_nodes = 0; + sord->world = world; + sord->n_quads = 0; for (unsigned i = 0; i < (NUM_ORDERS / 2); ++i) { if (indices & (1 << i)) { @@ -562,6 +583,18 @@ sord_add_quad_ref(Sord sord, const SordNode node) static void sord_drop_node(Sord sord, SordNode node) { + SordWorld world = sord_get_world(sord); + if (node->type == SORD_LITERAL) { + if (!g_hash_table_remove(world->literals, node)) { + fprintf(stderr, "Failed to remove literal from hash, leak!\n"); + return; + } + } else { + if (!g_hash_table_remove(world->names, node->buf)) { + fprintf(stderr, "Failed to remove resource from hash, leak!\n"); + return; + } + } free(node->buf); free(node); } @@ -593,8 +626,6 @@ sord_free(Sord sord) } sord_iter_free(i); - g_hash_table_unref(sord->names); - g_hash_table_unref(sord->literals); for (unsigned i = 0; i < NUM_ORDERS; ++i) if (sord->indices[i]) g_sequence_free(sord->indices[i]); @@ -602,6 +633,12 @@ sord_free(Sord sord) free(sord); } +SordWorld +sord_get_world(Sord sord) +{ + return sord->world; +} + int sord_num_quads(Sord sord) { @@ -609,9 +646,9 @@ sord_num_quads(Sord sord) } int -sord_num_nodes(Sord sord) +sord_num_nodes(SordWorld world) { - return sord->n_nodes; + return world->n_nodes; } SordIter @@ -728,9 +765,9 @@ sord_find(Sord sord, const SordQuad pat) } static SordNode -sord_lookup_name(Sord sord, const uint8_t* str, size_t str_len) +sord_lookup_name(SordWorld world, const uint8_t* str, size_t str_len) { - return g_hash_table_lookup(sord->names, str); + return g_hash_table_lookup(world->names, str); } static SordNode @@ -747,7 +784,7 @@ sord_new_node(SordNodeType type, const uint8_t* data, size_t n_bytes) } static SordNode -sord_new_literal_node(Sord sord, SordNode datatype, +sord_new_literal_node(SordNode datatype, const uint8_t* str, int str_len, const char* lang, uint8_t lang_len) { @@ -758,12 +795,13 @@ sord_new_literal_node(Sord sord, SordNode datatype, } static SordNode -sord_lookup_literal(Sord sord, SordNode type, +sord_lookup_literal(SordWorld world, SordNode type, const uint8_t* str, int str_len, const char* lang, uint8_t lang_len) { - SordNode node = sord_new_literal_node(sord, type, str, str_len, lang, lang_len); - SordNode id = g_hash_table_lookup(sord->literals, node); + // FIXME: double alloc, ick + SordNode node = sord_new_literal_node(type, str, str_len, lang, lang_len); + SordNode id = g_hash_table_lookup(world->literals, node); free(node); if (id) { return id; @@ -804,73 +842,73 @@ sord_node_get_datatype(SordNode ref) } static void -sord_add_node(Sord sord, SordNode node) +sord_add_node(SordWorld world, SordNode node) { node->refs = 0; - ++sord->n_nodes; + ++world->n_nodes; } SordNode -sord_new_uri_counted(Sord sord, const uint8_t* str, int str_len) +sord_new_uri_counted(SordWorld world, const uint8_t* str, int str_len) { - SordNode node = sord_lookup_name(sord, str, str_len); + SordNode node = sord_lookup_name(world, str, str_len); if (node) { return node; } node = sord_new_node(SORD_URI, (const uint8_t*)str, str_len + 1); - g_hash_table_insert(sord->names, g_strdup((const char*)str), node); - sord_add_node(sord, node); + g_hash_table_insert(world->names, g_strdup((const char*)str), node); + sord_add_node(world, node); return node; } SordNode -sord_new_uri(Sord sord, const uint8_t* str) +sord_new_uri(SordWorld world, const uint8_t* str) { - return sord_new_uri_counted(sord, str, strlen((const char*)str)); + return sord_new_uri_counted(world, str, strlen((const char*)str)); } SordNode -sord_new_blank_counted(Sord sord, const uint8_t* str, int str_len) +sord_new_blank_counted(SordWorld world, const uint8_t* str, int str_len) { - SordNode node = sord_lookup_name(sord, str, str_len); + SordNode node = sord_lookup_name(world, str, str_len); if (node) { return node; } node = sord_new_node(SORD_BLANK, (const uint8_t*)str, str_len + 1); - g_hash_table_insert(sord->names, g_strdup((const char*)str), node); - sord_add_node(sord, node); + g_hash_table_insert(world->names, g_strdup((const char*)str), node); + sord_add_node(world, node); return node; } SordNode -sord_new_blank(Sord sord, const uint8_t* str) +sord_new_blank(SordWorld world, const uint8_t* str) { - return sord_new_blank_counted(sord, str, strlen((const char*)str)); + return sord_new_blank_counted(world, str, strlen((const char*)str)); } SordNode -sord_new_literal_counted(Sord sord, SordNode type, +sord_new_literal_counted(SordWorld world, SordNode type, const uint8_t* str, int str_len, const char* lang, uint8_t lang_len) { - SordNode node = sord_lookup_literal(sord, type, str, str_len, lang, lang_len); + SordNode node = sord_lookup_literal(world, type, str, str_len, lang, lang_len); if (node) { return node; } - node = sord_new_literal_node(sord, type, str, str_len, lang, lang_len); - g_hash_table_insert(sord->literals, node, node); // FIXME: correct? - sord_add_node(sord, node); + node = sord_new_literal_node(type, str, str_len, lang, lang_len); + g_hash_table_insert(world->literals, node, node); // FIXME: correct? + sord_add_node(world, node); return node; } SordNode -sord_new_literal(Sord sord, SordNode type, +sord_new_literal(SordWorld world, SordNode type, const uint8_t* str, const char* lang) { - return sord_new_literal_counted(sord, type, + return sord_new_literal_counted(world, type, str, strlen((const char*)str), lang, lang ? strlen(lang) : 0); } diff --git a/src/sord_test.c b/src/sord_test.c index 1172069..ae3280b 100644 --- a/src/sord_test.c +++ b/src/sord_test.c @@ -30,7 +30,7 @@ typedef struct { SordQuad query; int expected_num_results; } QueryTest; #define USTR(s) ((const uint8_t*)(s)) static SordNode -uri(Sord sord, int num) +uri(SordWorld world, int num) { if (num == 0) return 0; @@ -39,11 +39,11 @@ uri(Sord sord, int num) const size_t uri_len = 3 + DIGITS; char* uri_num = uri + 3; // First `0' snprintf(uri_num, DIGITS + 1, "%0*d", DIGITS, num); - return sord_new_uri_counted(sord, (const uint8_t*)uri, uri_len); + return sord_new_uri_counted(world, (const uint8_t*)uri, uri_len); } void -generate(Sord sord, size_t n_quads, size_t n_objects_per) +generate(SordWorld world, Sord sord, size_t n_quads, size_t n_objects_per) { fprintf(stderr, "Generating %zu (S P *) quads with %zu objects each\n", n_quads, n_objects_per); @@ -53,7 +53,7 @@ generate(Sord sord, size_t n_quads, size_t n_objects_per) SordNode ids[2 + n_objects_per]; for (size_t j = 0; j < 2 + n_objects_per; ++j) { - ids[j] = uri(sord, num++); + ids[j] = uri(world, num++); } for (size_t j = 0; j < n_objects_per; ++j) { @@ -64,18 +64,18 @@ generate(Sord sord, size_t n_quads, size_t n_objects_per) // Add some literals SordQuad tup; - tup[0] = uri(sord, 98); - tup[1] = uri(sord, 4); - tup[2] = sord_new_literal(sord, 0, (const uint8_t*)"hello", NULL); + tup[0] = uri(world, 98); + tup[1] = uri(world, 4); + tup[2] = sord_new_literal(world, 0, (const uint8_t*)"hello", NULL); tup[3] = 0; sord_add(sord, tup); - tup[2] = sord_new_literal(sord, 0, USTR("hi"), NULL); + tup[2] = sord_new_literal(world, 0, USTR("hi"), NULL); sord_add(sord, tup); - tup[0] = uri(sord, 14); - tup[2] = sord_new_literal(sord, 0, USTR("bonjour"), "fr"); + tup[0] = uri(world, 14); + tup[2] = sord_new_literal(world, 0, USTR("bonjour"), "fr"); sord_add(sord, tup); - tup[2] = sord_new_literal(sord, 0, USTR("salut"), "fr"); + tup[2] = sord_new_literal(world, 0, USTR("salut"), "fr"); sord_add(sord, tup); // Attempt to add some duplicates @@ -83,11 +83,11 @@ generate(Sord sord, size_t n_quads, size_t n_objects_per) sord_add(sord, tup); // Add a blank node subject - tup[0] = sord_new_blank(sord, USTR("ablank")); + tup[0] = sord_new_blank(world, USTR("ablank")); sord_add(sord, tup); - tup[1] = uri(sord, 6); - tup[2] = uri(sord, 7); + tup[1] = uri(world, 6); + tup[2] = uri(world, 7); sord_add(sord, tup); } @@ -105,7 +105,7 @@ test_fail() ((t)[2] ? sord_node_get_string((t)[2]) : USTR("*")) int -test_read(Sord sord, const size_t n_quads, const int n_objects_per) +test_read(SordWorld world, Sord sord, const size_t n_quads, const int n_objects_per) { int ret = EXIT_SUCCESS; @@ -132,14 +132,14 @@ test_read(Sord sord, const size_t n_quads, const int n_objects_per) QueryTest patterns[NUM_PATTERNS] = { { { 0, 0, 0 }, (n_quads * n_objects_per) + 6 }, - { { uri(sord, 9), uri(sord, 9), uri(sord, 9) }, 0 }, - { { uri(sord, 1), uri(sord, 2), uri(sord, 4) }, 1 }, - { { uri(sord, 3), uri(sord, 4), uri(sord, 0) }, 2 }, - { { uri(sord, 0), uri(sord, 2), uri(sord, 4) }, 1 }, - { { uri(sord, 0), uri(sord, 0), uri(sord, 4) }, 1 }, - { { uri(sord, 1), uri(sord, 0), uri(sord, 0) }, 2 }, - { { uri(sord, 1), uri(sord, 0), uri(sord, 4) }, 1 }, - { { uri(sord, 0), uri(sord, 2), uri(sord, 0) }, 2 } }; + { { uri(world, 9), uri(world, 9), uri(world, 9) }, 0 }, + { { uri(world, 1), uri(world, 2), uri(world, 4) }, 1 }, + { { uri(world, 3), uri(world, 4), uri(world, 0) }, 2 }, + { { uri(world, 0), uri(world, 2), uri(world, 4) }, 1 }, + { { uri(world, 0), uri(world, 0), uri(world, 4) }, 1 }, + { { uri(world, 1), uri(world, 0), uri(world, 0) }, 2 }, + { { uri(world, 1), uri(world, 0), uri(world, 4) }, 1 }, + { { uri(world, 0), uri(world, 2), uri(world, 0) }, 2 } }; for (unsigned i = 0; i < NUM_PATTERNS; ++i) { QueryTest test = patterns[i]; @@ -168,7 +168,7 @@ test_read(Sord sord, const size_t n_quads, const int n_objects_per) } // Query blank node subject - SordQuad pat = { sord_new_blank(sord, USTR("ablank")), 0, 0 }; + SordQuad pat = { sord_new_blank(world, USTR("ablank")), 0, 0 }; if (!pat[0]) { fprintf(stderr, "Blank node subject lost\n"); return test_fail(); @@ -264,23 +264,25 @@ main(int argc, char** argv) sord_free(NULL); // Shouldn't crash + SordWorld world = sord_world_new(); + // Create with minimal indexing - Sord sord = sord_new(SORD_SPO, false); - generate(sord, n_quads, n_objects_per); + Sord sord = sord_new(world, SORD_SPO, false); + generate(world, sord, n_quads, n_objects_per); - if (test_read(sord, n_quads, n_objects_per)) { + if (test_read(world, sord, n_quads, n_objects_per)) { sord_free(sord); return EXIT_FAILURE; } // Check interning merges equivalent values - SordNode uri_id = sord_new_uri(sord, USTR("http://example.org")); - SordNode blank_id = sord_new_uri(sord, USTR("testblank")); - SordNode lit_id = sord_new_literal(sord, uri_id, USTR("hello"), NULL); + SordNode uri_id = sord_new_uri(world, USTR("http://example.org")); + SordNode blank_id = sord_new_uri(world, USTR("testblank")); + SordNode lit_id = sord_new_literal(world, uri_id, USTR("hello"), NULL); //sord_clear_cache(write); - SordNode uri_id2 = sord_new_uri(sord, USTR("http://example.org")); - SordNode blank_id2 = sord_new_uri(sord, USTR("testblank")); - SordNode lit_id2 = sord_new_literal(sord, uri_id, USTR("hello"), NULL); + SordNode uri_id2 = sord_new_uri(world, USTR("http://example.org")); + SordNode blank_id2 = sord_new_uri(world, USTR("testblank")); + SordNode lit_id2 = sord_new_literal(world, uri_id, USTR("hello"), NULL); if (uri_id2 != uri_id) { fprintf(stderr, "Fail: URI interning failed (duplicates)\n"); goto fail; @@ -293,9 +295,9 @@ main(int argc, char** argv) } // Check interning doesn't clash non-equivalent values - SordNode uri_id3 = sord_new_uri(sord, USTR("http://example.orgX")); - SordNode blank_id3 = sord_new_uri(sord, USTR("testblankX")); - SordNode lit_id3 = sord_new_literal(sord, uri_id, USTR("helloX"), NULL); + SordNode uri_id3 = sord_new_uri(world, USTR("http://example.orgX")); + SordNode blank_id3 = sord_new_uri(world, USTR("testblankX")); + SordNode lit_id3 = sord_new_literal(world, uri_id, USTR("helloX"), NULL); if (uri_id3 == uri_id) { fprintf(stderr, "Fail: URI interning failed (clash)\n"); goto fail; @@ -314,15 +316,15 @@ main(int argc, char** argv) }; for (int i = 0; i < 6; ++i) { - sord = sord_new((1 << i), false); + sord = sord_new(world, (1 << i), false); printf("Testing Index `%s'\n", index_names[i]); - generate(sord, n_quads, n_objects_per); - if (test_read(sord, n_quads, n_objects_per)) + generate(world, sord, n_quads, n_objects_per); + if (test_read(world, sord, n_quads, n_objects_per)) goto fail; sord_free(sord); } - sord = sord_new(SORD_SPO, false); + sord = sord_new(world, SORD_SPO, false); if (test_write(sord, n_quads, n_objects_per)) goto fail; diff --git a/src/sordi.c b/src/sordi.c index ab2a10a..bf27e48 100644 --- a/src/sordi.c +++ b/src/sordi.c @@ -118,11 +118,12 @@ main(int argc, char** argv) const uint8_t* input = (const uint8_t*)argv[a++]; - Sord sord = sord_new(SORD_SPO|SORD_OPS, false); + SordWorld world = sord_world_new(); + Sord sord = sord_new(world, SORD_SPO|SORD_OPS, false); bool success = sord_read_file(sord, input, NULL, NULL); - printf("loaded %u statements\n", sord_num_nodes(sord)); + printf("loaded %u statements\n", sord_num_nodes(world)); SerdURI base_uri; if (!serd_uri_parse(input, &base_uri)) { diff --git a/src/syntax.c b/src/syntax.c index 1cff77b..7444fe5 100644 --- a/src/syntax.c +++ b/src/syntax.c @@ -32,6 +32,7 @@ typedef struct { SordNode graph_uri_node; SerdNode base_uri_node; SerdURI base_uri; + SordWorld world; Sord sord; } ReadState; @@ -99,12 +100,12 @@ sord_node_from_serd_node(ReadState* state, const SerdNode* sn) case SERD_NOTHING: return NULL; case SERD_LITERAL: - return sord_new_literal(state->sord, NULL, sn->buf, NULL); + return sord_new_literal(state->world, NULL, sn->buf, NULL); case SERD_URI: { SerdURI abs_uri; SerdNode abs_uri_node = serd_node_new_uri_from_node( sn, &state->base_uri, &abs_uri); - SordNode ret = sord_new_uri(state->sord, abs_uri_node.buf); + SordNode ret = sord_new_uri(state->world, abs_uri_node.buf); serd_node_free(&abs_uri_node); return ret; } @@ -120,7 +121,7 @@ sord_node_from_serd_node(ReadState* state, const SerdNode* sn) memcpy(buf, uri_prefix.buf, uri_prefix.len); memcpy(buf + uri_prefix.len, uri_suffix.buf, uri_suffix.len); buf[uri_len] = '\0'; - SordNode ret = sord_new_uri_counted(state->sord, + SordNode ret = sord_new_uri_counted(state->world, buf, uri_prefix.len + uri_suffix.len); free(buf); return ret; @@ -128,7 +129,7 @@ sord_node_from_serd_node(ReadState* state, const SerdNode* sn) case SERD_BLANK_ID: case SERD_ANON_BEGIN: case SERD_ANON: - return sord_new_blank(state->sord, sn->buf); + return sord_new_blank(state->world, sn->buf); } return NULL; } @@ -229,7 +230,9 @@ sord_read_file_handle(Sord sord, base_uri_n_bytes - 1, // FIXME: UTF-8 base_uri_str }; - ReadState state = { NULL, env, graph, base_uri_node, base_uri, sord }; + ReadState state = { NULL, env, graph, + base_uri_node, base_uri, + sord_get_world(sord), sord }; state.reader = serd_reader_new( SERD_TURTLE, &state, |