From 5bc02bf31c689fe4a3758c8b484d220d4de33992 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Thu, 3 Feb 2011 04:43:37 +0000 Subject: Add SordTupleIndex enumeration for more readable Tuple code. Add sord_node_equals. Add sord_read_file_handle and add graph URI parameter to sord_read_file. Resolve relative URIs when parsing into a model. Read literal datatype or language when parsing into a model. Bettern debug printing. Add GOPS index by default. Add export_indluces to library in wscript for building against sord locally. git-svn-id: http://svn.drobilla.net/sord/trunk@15 3d64ff67-21c5-427c-a301-fe4f08042e5a --- src/sord.c | 40 +++++++++++++++++------------ src/sord_internal.h | 34 ++++++++++++++++++++++++ src/sordi.c | 2 +- src/syntax.c | 74 +++++++++++++++++++++++++++++++++++++++++------------ 4 files changed, 116 insertions(+), 34 deletions(-) create mode 100644 src/sord_internal.h (limited to 'src') diff --git a/src/sord.c b/src/sord.c index 7f050e2..15d535a 100644 --- a/src/sord.c +++ b/src/sord.c @@ -37,7 +37,7 @@ #include #include "sord-config.h" -#include "sord/sord.h" +#include "sord_internal.h" #define SORD_LOG(prefix, ...) fprintf(stderr, "[Sord::" prefix "] " __VA_ARGS__) @@ -63,8 +63,13 @@ #define DEFAULT_ORDER SPO #define DEFAULT_GRAPH_ORDER GSPO -#define TUP_FMT "(%d %d %d %d)" -#define TUP_FMT_ARGS(t) ((t)[0]), ((t)[1]), ((t)[2]), ((t)[3]) +#define TUP_FMT "(%s %s %s %s)" +#define TUP_FMT_ELEM(e) ((e) ? sord_node_get_string(e) : "*") +#define TUP_FMT_ARGS(t) \ + TUP_FMT_ELEM((t)[0]), \ + TUP_FMT_ELEM((t)[1]), \ + TUP_FMT_ELEM((t)[2]), \ + TUP_FMT_ELEM((t)[3]) #define TUP_S 0 #define TUP_P 1 @@ -139,17 +144,6 @@ struct _SordIter { bool skip_graphs; ///< True iff iteration should ignore graphs }; -/** Node */ -struct _SordNode { - SordNodeType type; ///< SordNodeType - size_t n_bytes; ///< Length of data in bytes (including terminator) - SordCount refs; ///< Reference count (i.e. number of containing tuples) - void* user_data; ///< Opaque user data - SordNode datatype; ///< Literal data type (ID of a URI node, or 0) - const char* lang; ///< Literal language (interned string) - char* buf; ///< Value (string) -}; - static unsigned sord_literal_hash(const void* n) { @@ -168,7 +162,7 @@ sord_literal_equal(const void* a, const void* b) } static inline int -sord_node_compare(Sord sord, const SordNode a, const SordNode b) +sord_node_compare(const SordNode a, const SordNode b) { if (a->type != b->type) return a->type - b->type; @@ -185,6 +179,13 @@ sord_node_compare(Sord sord, const SordNode a, const SordNode b) return 0; } +bool +sord_node_equals(const SordNode a, const SordNode b) +{ + // FIXME: nodes are interned, this can be much faster + return sord_node_compare(a, b) == 0; +} + /** Compare two IDs (dereferencing if necessary). * The null ID, 0, is treated as a minimum (it is less than every other * possible ID, except itself). This allows it to be used as a wildcard @@ -200,7 +201,7 @@ sord_id_compare(Sord sord, const SordID a, const SordID b) } else { SordNode a_node = sord_node_load(sord, a); SordNode b_node = sord_node_load(sord, b); - const int ret = sord_node_compare(sord, a_node, b_node); + const int ret = sord_node_compare(a_node, b_node); return ret; } } @@ -601,8 +602,9 @@ sord_open(Sord sord) // Use default indexing, avoids O(n) in all cases sord->indices[SPO] = g_sequence_new(NULL); sord->indices[OPS] = g_sequence_new(NULL); - sord->indices[PSO] = g_sequence_new(NULL); + //sord->indices[PSO] = g_sequence_new(NULL); sord->indices[GSPO] = g_sequence_new(NULL); // XXX: default? do on demand? + sord->indices[GOPS] = g_sequence_new(NULL); // XXX: default? do on demand? } if (!sord->indices[DEFAULT_ORDER]) @@ -734,6 +736,10 @@ sord_find(Sord sord, const SordTuple pat) SordTuple search_key = { a, b, c, d }; GSequence* const db = sord->indices[index_order]; GSequenceIter* const cur = index_lower_bound(sord, db, search_key); + if (g_sequence_iter_is_end(cur)) { + SORD_FIND_LOG("No match found\n"); + return NULL; + } const SordID* const key = (const SordID*)g_sequence_get(cur); if (!key || ( (mode == RANGE || mode == SINGLE) && !sord_tuple_match_inline(search_key, key) )) { diff --git a/src/sord_internal.h b/src/sord_internal.h new file mode 100644 index 0000000..b7a3398 --- /dev/null +++ b/src/sord_internal.h @@ -0,0 +1,34 @@ +/* Sord, a lightweight RDF model library. + * Copyright 2010-2011 David Robillard + * + * Sord is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Sord is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + */ + +#ifndef SORD_INTERNAL_H +#define SORD_INTERNAL_H + +#include "sord/sord.h" + +/** Node */ +struct _SordNode { + SordNodeType type; ///< SordNodeType + size_t n_bytes; ///< Length of data in bytes (including terminator) + SordCount refs; ///< Reference count (i.e. number of containing tuples) + void* user_data; ///< Opaque user data + SordNode datatype; ///< Literal data type (ID of a URI node, or 0) + const char* lang; ///< Literal language (interned string) + char* buf; ///< Value (string) +}; + +#endif // SORD_INTERNAL_H diff --git a/src/sordi.c b/src/sordi.c index 6260879..ac05a2a 100644 --- a/src/sordi.c +++ b/src/sordi.c @@ -121,7 +121,7 @@ main(int argc, char** argv) Sord sord = sord_new(); sord_open(sord); - bool success = sord_read_file(sord, input); + bool success = sord_read_file(sord, input, NULL); printf("loaded %u statements\n", sord_num_nodes(sord)); diff --git a/src/syntax.c b/src/syntax.c index b18052a..4a6567d 100644 --- a/src/syntax.c +++ b/src/syntax.c @@ -18,14 +18,17 @@ #include #include +#include + #include "serd/serd.h" #include "sord-config.h" -#include "sord/sord.h" +#include "sord_internal.h" typedef struct { SerdReader reader; SerdEnv env; + SordNode graph_uri_node; SerdNode base_uri_node; SerdURI base_uri; Sord sord; @@ -99,13 +102,24 @@ sord_node_from_serd_node(ReadState* state, const SerdNode* sn) case SERD_LITERAL: return sord_get_literal(state->sord, true, NULL, (const char*)sn->buf, NULL); - case SERD_URI: - return sord_get_uri_counted(state->sord, true, - (const char*)sn->buf, sn->n_chars); + case SERD_URI: { + SerdURI uri; + if (!serd_uri_parse(sn->buf, &uri)) { + return NULL; + } + SerdURI abs_uri; + if (!serd_uri_resolve(&uri, &state->base_uri, &abs_uri)) { + return false; + } + SerdURI ignored; + SerdNode abs_uri_node = serd_node_new_uri(&abs_uri, &ignored); + return sord_get_uri(state->sord, true, (const char*)abs_uri_node.buf); + } case SERD_CURIE: { SerdChunk uri_prefix; SerdChunk uri_suffix; if (!serd_env_expand(state->env, sn, &uri_prefix, &uri_suffix)) { + fprintf(stderr, "ERROR: failed to expand qname `%s'\n", sn->buf); return NULL; } const size_t uri_len = uri_prefix.len + uri_suffix.len; @@ -138,11 +152,25 @@ event_statement(void* handle, SordTuple tup; tup[0] = sord_node_from_serd_node(state, subject); tup[1] = sord_node_from_serd_node(state, predicate); - tup[2] = sord_node_from_serd_node(state, object); - tup[3] = (graph && graph->buf) - ? sord_node_from_serd_node(state, graph) - : NULL; - + + SordNode object_node = sord_node_from_serd_node(state, object); + + if (object_datatype) { + object_node->datatype = sord_node_from_serd_node(state, object_datatype); + } + if (object_lang) { + object_node->lang = g_intern_string((const char*)object_lang->buf); + } + tup[2] = object_node; + + if (state->graph_uri_node) { + tup[3] = state->graph_uri_node; + } else { + tup[3] = (graph && graph->buf) + ? sord_node_from_serd_node(state, graph) + : NULL; + } + sord_add(state->sord, tup); return true; @@ -150,7 +178,9 @@ event_statement(void* handle, SORD_API bool -sord_read_file(Sord sord, const uint8_t* input) +sord_read_file(Sord sord, + const uint8_t* input, + const SordNode graph) { const uint8_t* filename = NULL; if (serd_uri_string_has_scheme(input)) { @@ -167,14 +197,27 @@ sord_read_file(Sord sord, const uint8_t* input) filename = input; } - FILE* in_fd = fopen((const char*)input, "r"); + FILE* in_fd = fopen((const char*)filename, "r"); if (!in_fd) { - fprintf(stderr, "failed to open file %s\n", input); + fprintf(stderr, "failed to open file %s\n", filename); return 1; } + const bool success = sord_read_file_handle(sord, in_fd, input, graph); + + fclose(in_fd); + return success; +} + +SORD_API +bool +sord_read_file_handle(Sord sord, + FILE* fd, + const uint8_t* base_uri_str_in, + const SordNode graph) +{ size_t base_uri_n_bytes = 0; - uint8_t* base_uri_str = copy_string(input, &base_uri_n_bytes); + uint8_t* base_uri_str = copy_string(base_uri_str_in, &base_uri_n_bytes); SerdURI base_uri; if (!serd_uri_parse(base_uri_str, &base_uri)) { fprintf(stderr, "invalid base URI `%s'\n", base_uri_str); @@ -187,18 +230,17 @@ sord_read_file(Sord sord, const uint8_t* input) base_uri_n_bytes - 1, // FIXME: UTF-8 base_uri_str }; - ReadState state = { NULL, env, base_uri_node, base_uri, sord }; + ReadState state = { NULL, env, graph, base_uri_node, base_uri, sord }; state.reader = serd_reader_new( SERD_TURTLE, &state, event_base, event_prefix, event_statement, NULL); - const bool success = serd_reader_read_file(state.reader, in_fd, input); + const bool success = serd_reader_read_file(state.reader, fd, base_uri_str); serd_reader_free(state.reader); serd_env_free(state.env); serd_node_free(&state.base_uri_node); - fclose(in_fd); return success; } -- cgit v1.2.1