From 22f5f5c7e32b043433103edb404bfbe43effa15d Mon Sep 17 00:00:00 2001 From: David Robillard Date: Thu, 12 May 2011 22:50:07 +0000 Subject: Add base_uri parameter to sord_read_file. Add sord_write_writer. Use command line base URI in sordi if given. Use correct output style options for output syntax in sordi. Use sord_write_writer in sordi instead of manual writing code. Abbreviate serialised model output for Turtle. Preserve UTF-8 length information for nodes from Serd. Use string lengths not including terminator (match new Serd). Add test suite. git-svn-id: http://svn.drobilla.net/sord/trunk@111 3d64ff67-21c5-427c-a301-fe4f08042e5a --- src/sord.c | 29 +++++++++++------- src/sordi.c | 79 ++++++++++++++++++++---------------------------- src/syntax.c | 99 ++++++++++++++++++++++++++++++++++++++++++------------------ 3 files changed, 121 insertions(+), 86 deletions(-) (limited to 'src') diff --git a/src/sord.c b/src/sord.c index d82eebc..4bd191a 100644 --- a/src/sord.c +++ b/src/sord.c @@ -916,7 +916,8 @@ sord_add_node(SordWorld* world, SordNode* node) } static SordNode* -sord_new_uri_counted(SordWorld* world, const uint8_t* str, size_t str_len) +sord_new_uri_counted(SordWorld* world, const uint8_t* str, + size_t n_bytes, size_t n_chars) { SordNode* node = sord_lookup_name(world, str); if (node) { @@ -924,7 +925,7 @@ sord_new_uri_counted(SordWorld* world, const uint8_t* str, size_t str_len) return node; } - node = sord_new_node(SERD_URI, str, str_len + 1, str_len, 0, 0, 0); + node = sord_new_node(SERD_URI, str, n_bytes, n_chars, 0, 0, 0); assert(!g_hash_table_lookup(world->names, node->node.buf)); g_hash_table_insert(world->names, (char*)node->node.buf, node); sord_add_node(world, node); @@ -934,11 +935,13 @@ sord_new_uri_counted(SordWorld* world, const uint8_t* str, size_t str_len) SordNode* sord_new_uri(SordWorld* world, const uint8_t* str) { - return sord_new_uri_counted(world, str, strlen((const char*)str)); + const SerdNode node = serd_node_from_string(SERD_URI, str); + return sord_new_uri_counted(world, str, node.n_bytes, node.n_chars); } static SordNode* -sord_new_blank_counted(SordWorld* world, const uint8_t* str, size_t str_len) +sord_new_blank_counted(SordWorld* world, const uint8_t* str, + size_t n_bytes, size_t n_chars) { SordNode* node = sord_lookup_name(world, str); if (node) { @@ -946,7 +949,7 @@ sord_new_blank_counted(SordWorld* world, const uint8_t* str, size_t str_len) return node; } - node = sord_new_node(SERD_BLANK_ID, str, str_len + 1, str_len, 0, 0, 0); + node = sord_new_node(SERD_BLANK_ID, str, n_bytes, n_chars, 0, 0, 0); g_hash_table_insert(world->names, (char*)node->node.buf, node); sord_add_node(world, node); return node; @@ -955,7 +958,8 @@ sord_new_blank_counted(SordWorld* world, const uint8_t* str, size_t str_len) SordNode* sord_new_blank(SordWorld* world, const uint8_t* str) { - return sord_new_blank_counted(world, str, strlen((const char*)str)); + const SerdNode node = serd_node_from_string(SERD_URI, str); + return sord_new_blank_counted(world, str, node.n_bytes, node.n_chars); } static SordNode* @@ -1022,8 +1026,10 @@ sord_node_from_serd_node(SordWorld* world, SerdURI abs_uri; SerdNode abs_uri_node = serd_node_new_uri_from_node( sn, &base_uri, &abs_uri); - SordNode* ret = sord_new_uri_counted(world, abs_uri_node.buf, - abs_uri_node.n_bytes - 1); + SordNode* ret = sord_new_uri_counted(world, + abs_uri_node.buf, + abs_uri_node.n_bytes, + abs_uri_node.n_chars); serd_node_free(&abs_uri_node); return ret; } @@ -1040,14 +1046,15 @@ sord_node_from_serd_node(SordWorld* world, memcpy(buf + uri_prefix.len, uri_suffix.buf, uri_suffix.len); buf[uri_len] = '\0'; SordNode* ret = sord_new_uri_counted( - world, buf, uri_prefix.len + uri_suffix.len); + world, buf, uri_prefix.len + uri_suffix.len, + uri_prefix.len + uri_suffix.len); // FIXME: UTF-8 free(buf); return ret; } case SERD_BLANK_ID: case SERD_ANON_BEGIN: case SERD_ANON: - return sord_new_blank_counted(world, sn->buf, sn->n_bytes - 1); + return sord_new_blank_counted(world, sn->buf, sn->n_bytes, sn->n_chars); } return NULL; } @@ -1055,7 +1062,7 @@ sord_node_from_serd_node(SordWorld* world, const SerdNode* sord_node_to_serd_node(const SordNode* node) { - return &node->node; + return node ? &node->node : &SERD_NODE_NULL; } void diff --git a/src/sordi.c b/src/sordi.c index 5410beb..493e4d1 100644 --- a/src/sordi.c +++ b/src/sordi.c @@ -118,25 +118,47 @@ main(int argc, char** argv) const uint8_t* input = (const uint8_t*)argv[a++]; + const uint8_t* base_uri_str = NULL; + SerdURI base_uri; + if (a < argc) { // Base URI given on command line + const uint8_t* const in_base_uri = (const uint8_t*)argv[a++]; + if (serd_uri_parse((const uint8_t*)in_base_uri, &base_uri)) { + fprintf(stderr, "Invalid base URI <%s>\n", argv[2]); + return 1; + } + base_uri_str = in_base_uri; + } else { // Use input file URI + base_uri_str = input; + } + + if (serd_uri_parse(base_uri_str, &base_uri)) { + fprintf(stderr, "Invalid base URI <%s>\n", base_uri_str); + return 1; + } + SordWorld* world = sord_world_new(); SordModel* sord = sord_new(world, SORD_SPO|SORD_OPS, false); SerdEnv* env = serd_env_new(); - bool success = sord_read_file(sord, env, input, NULL, NULL); + bool success = sord_read_file(sord, env, input, base_uri_str, NULL, NULL); - fprintf(stderr, "Loaded %zu statements\n", sord_num_nodes(world)); - - SerdURI base_uri; - if (serd_uri_parse(input, &base_uri)) { - fprintf(stderr, "Bad input URI <%s>\n", input); - return 1; - } + fprintf(stderr, "Loaded %zu statements\n", sord_num_quads(sord)); SerdEnv* write_env = serd_env_new(); + SerdNode base_uri_node = serd_node_from_string(SERD_URI, base_uri_str); + serd_env_set_base_uri(write_env, &base_uri_node); + serd_env_get_base_uri(write_env, &base_uri); + + SerdStyle output_style = SERD_STYLE_RESOLVED; + if (output_syntax == SERD_NTRIPLES) { + output_style |= SERD_STYLE_ASCII; + } else { + output_style |= SERD_STYLE_CURIED | SERD_STYLE_ABBREVIATED; + } SerdWriter* writer = serd_writer_new( - SERD_TURTLE, - SERD_STYLE_ABBREVIATED|SERD_STYLE_RESOLVED|SERD_STYLE_CURIED, + output_syntax, + output_style, write_env, &base_uri, file_sink, stdout); // Write @prefix directives @@ -145,42 +167,7 @@ main(int argc, char** argv) writer); // Write statements - SordQuad pat = { 0, 0, 0, 0 }; - SordIter* iter = sord_find(sord, pat); - for (; !sord_iter_end(iter); sord_iter_next(iter)) { - SordQuad tup; - sord_iter_get(iter, tup); - const SordNode* s = tup[SORD_SUBJECT]; - const SordNode* p = tup[SORD_PREDICATE]; - const SordNode* o = tup[SORD_OBJECT]; - SerdNode ss = serd_node_from_sord_node(s); - SerdNode sp = serd_node_from_sord_node(p); - SerdNode so = serd_node_from_sord_node(o); - if (sord_node_is_inline_object(o)) { - so.type = SERD_ANON_BEGIN; - serd_writer_write_statement( - writer, NULL, &ss, &sp, &so, NULL, NULL); - so.type = SERD_ANON; - SordQuad sub_pat = { o, 0, 0, 0 }; - SordIter* sub_iter = sord_find(sord, sub_pat); - for (; !sord_iter_end(sub_iter); sord_iter_next(sub_iter)) { - SordQuad sub_tup; - sord_iter_get(sub_iter, sub_tup); - const SordNode* sub_p = sub_tup[SORD_PREDICATE]; - const SordNode* sub_o = sub_tup[SORD_OBJECT]; - SerdNode sub_sp = serd_node_from_sord_node(sub_p); - SerdNode sub_so = serd_node_from_sord_node(sub_o); - serd_writer_write_statement( - writer, NULL, &so, &sub_sp, &sub_so, NULL, NULL); - } - sord_iter_free(sub_iter); - serd_writer_end_anon(writer, &so); - } else if (!sord_node_is_inline_object(s)) { - serd_writer_write_statement( - writer, NULL, &ss, &sp, &so, NULL, NULL); - } - } - sord_iter_free(iter); + sord_write_writer(sord, writer, NULL); serd_writer_finish(writer); serd_writer_free(writer); diff --git a/src/syntax.c b/src/syntax.c index eeb933e..73c5229 100644 --- a/src/syntax.c +++ b/src/syntax.c @@ -132,9 +132,14 @@ bool sord_read_file(SordModel* model, SerdEnv* env, const uint8_t* uri, + const uint8_t* base_uri, SordNode* graph, const uint8_t* blank_prefix) { + if (!base_uri) { + base_uri = uri; + } + const uint8_t* const path = sord_file_uri_to_path(uri); if (!path) { fprintf(stderr, "Unable to read non-file URI <%s>\n", uri); @@ -148,7 +153,7 @@ sord_read_file(SordModel* model, } const bool ret = sord_read_file_handle( - model, env, fd, uri, graph, blank_prefix); + model, env, fd, base_uri, graph, blank_prefix); fclose(fd); return ret; } @@ -257,36 +262,72 @@ file_sink(const void* buf, size_t len, void* stream) } static void -sord_write(const SordModel* model, - const SordNode* graph, - SerdWriter* writer) +write_statement(SordModel* sord, SerdWriter* writer, SordQuad tup, + const SordNode* anon_subject) { - const SerdNode* g = sord_node_to_serd_node(graph); - for (SordIter* i = sord_begin(model); !sord_iter_end(i); sord_iter_next(i)) { - SordQuad quad; - sord_iter_get(i, quad); - - const SerdNode* s = sord_node_to_serd_node(quad[SORD_SUBJECT]); - const SerdNode* p = sord_node_to_serd_node(quad[SORD_PREDICATE]); - const SerdNode* o = sord_node_to_serd_node(quad[SORD_OBJECT]); - const SerdNode* d = sord_node_to_serd_node( - sord_node_get_datatype(quad[SORD_OBJECT])); - - const char* lang_str = sord_node_get_language(quad[SORD_OBJECT]); - size_t lang_len = lang_str ? strlen(lang_str) : 0; - - SerdNode language = SERD_NODE_NULL; - if (lang_str) { - language.type = SERD_LITERAL; - language.n_bytes = lang_len + 1; - language.n_chars = lang_len; - language.buf = (const uint8_t*)lang_str; - }; - - serd_writer_write_statement(writer, g, s, p, o, d, &language); + const SordNode* s = tup[SORD_SUBJECT]; + const SordNode* p = tup[SORD_PREDICATE]; + const SordNode* o = tup[SORD_OBJECT]; + const SordNode* d = sord_node_get_datatype(o); + const SerdNode* ss = sord_node_to_serd_node(s); + const SerdNode* sp = sord_node_to_serd_node(p); + const SerdNode* so = sord_node_to_serd_node(o); + const SerdNode* sd = sord_node_to_serd_node(d); + + const char* lang_str = sord_node_get_language(o); + size_t lang_len = lang_str ? strlen(lang_str) : 0; + SerdNode language = SERD_NODE_NULL; + if (lang_str) { + language.type = SERD_LITERAL; + language.n_bytes = lang_len; + language.n_chars = lang_len; + language.buf = (const uint8_t*)lang_str; + }; + + SerdNode subject = *ss; + if (anon_subject) { + assert(s == anon_subject); + subject.type = SERD_ANON; + } else if (sord_node_is_inline_object(s)) { + return; + } + + if (sord_node_is_inline_object(o)) { + SerdNode anon = *so; + anon.type = SERD_ANON_BEGIN; + serd_writer_write_statement( + writer, NULL, &subject, sp, &anon, sd, &language); + SordQuad sub_pat = { o, 0, 0, 0 }; + SordIter* sub_iter = sord_find(sord, sub_pat); + for (; !sord_iter_end(sub_iter); sord_iter_next(sub_iter)) { + SordQuad sub_tup; + sord_iter_get(sub_iter, sub_tup); + write_statement(sord, writer, sub_tup, o); + } + sord_iter_free(sub_iter); + serd_writer_end_anon(writer, so); + } else if (!sord_node_is_inline_object(s) || s == anon_subject) { + serd_writer_write_statement( + writer, NULL, &subject, sp, so, sd, &language); } } +bool +sord_write_writer(SordModel* model, + SerdWriter* writer, + SordNode* graph) +{ + SordQuad pat = { 0, 0, 0, graph }; + SordIter* iter = sord_find(model, pat); + for (; !sord_iter_end(iter); sord_iter_next(iter)) { + SordQuad tup; + sord_iter_get(iter, tup); + write_statement(model, writer, tup, NULL); + } + sord_iter_free(iter); + return true; +} + static SerdWriter* make_writer(SerdEnv* env, const uint8_t* base_uri_str_in, @@ -324,7 +365,7 @@ sord_write_file_handle(SordModel* model, const uint8_t* blank_prefix) { SerdWriter* writer = make_writer(env, base_uri_str_in, file_sink, fd); - sord_write(model, graph, writer); + sord_write_writer(model, writer, graph); serd_writer_free(writer); return true; } @@ -352,7 +393,7 @@ sord_write_string(SordModel* model, { struct SerdBuffer buf = { NULL, 0 }; SerdWriter* writer = make_writer(env, base_uri, string_sink, &buf); - sord_write(model, NULL, writer); + sord_write_writer(model, writer, NULL); serd_writer_free(writer); string_sink("", 1, &buf); return buf.buf; -- cgit v1.2.1