summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2011-05-12 22:50:07 +0000
committerDavid Robillard <d@drobilla.net>2011-05-12 22:50:07 +0000
commit22f5f5c7e32b043433103edb404bfbe43effa15d (patch)
tree2bb579f0af8415d0fb78eef788ba1714079f3824 /src
parentd1a185c160bf8767e9001a6357f46ac6cfc6f94d (diff)
downloadsord-22f5f5c7e32b043433103edb404bfbe43effa15d.tar.gz
sord-22f5f5c7e32b043433103edb404bfbe43effa15d.tar.bz2
sord-22f5f5c7e32b043433103edb404bfbe43effa15d.zip
Add base_uri parameter to sord_read_file.
Add sord_write_writer. Use command line base URI in sordi if given. Use correct output style options for output syntax in sordi. Use sord_write_writer in sordi instead of manual writing code. Abbreviate serialised model output for Turtle. Preserve UTF-8 length information for nodes from Serd. Use string lengths not including terminator (match new Serd). Add test suite. git-svn-id: http://svn.drobilla.net/sord/trunk@111 3d64ff67-21c5-427c-a301-fe4f08042e5a
Diffstat (limited to 'src')
-rw-r--r--src/sord.c29
-rw-r--r--src/sordi.c79
-rw-r--r--src/syntax.c99
3 files changed, 121 insertions, 86 deletions
diff --git a/src/sord.c b/src/sord.c
index d82eebc..4bd191a 100644
--- a/src/sord.c
+++ b/src/sord.c
@@ -916,7 +916,8 @@ sord_add_node(SordWorld* world, SordNode* node)
}
static SordNode*
-sord_new_uri_counted(SordWorld* world, const uint8_t* str, size_t str_len)
+sord_new_uri_counted(SordWorld* world, const uint8_t* str,
+ size_t n_bytes, size_t n_chars)
{
SordNode* node = sord_lookup_name(world, str);
if (node) {
@@ -924,7 +925,7 @@ sord_new_uri_counted(SordWorld* world, const uint8_t* str, size_t str_len)
return node;
}
- node = sord_new_node(SERD_URI, str, str_len + 1, str_len, 0, 0, 0);
+ node = sord_new_node(SERD_URI, str, n_bytes, n_chars, 0, 0, 0);
assert(!g_hash_table_lookup(world->names, node->node.buf));
g_hash_table_insert(world->names, (char*)node->node.buf, node);
sord_add_node(world, node);
@@ -934,11 +935,13 @@ sord_new_uri_counted(SordWorld* world, const uint8_t* str, size_t str_len)
SordNode*
sord_new_uri(SordWorld* world, const uint8_t* str)
{
- return sord_new_uri_counted(world, str, strlen((const char*)str));
+ const SerdNode node = serd_node_from_string(SERD_URI, str);
+ return sord_new_uri_counted(world, str, node.n_bytes, node.n_chars);
}
static SordNode*
-sord_new_blank_counted(SordWorld* world, const uint8_t* str, size_t str_len)
+sord_new_blank_counted(SordWorld* world, const uint8_t* str,
+ size_t n_bytes, size_t n_chars)
{
SordNode* node = sord_lookup_name(world, str);
if (node) {
@@ -946,7 +949,7 @@ sord_new_blank_counted(SordWorld* world, const uint8_t* str, size_t str_len)
return node;
}
- node = sord_new_node(SERD_BLANK_ID, str, str_len + 1, str_len, 0, 0, 0);
+ node = sord_new_node(SERD_BLANK_ID, str, n_bytes, n_chars, 0, 0, 0);
g_hash_table_insert(world->names, (char*)node->node.buf, node);
sord_add_node(world, node);
return node;
@@ -955,7 +958,8 @@ sord_new_blank_counted(SordWorld* world, const uint8_t* str, size_t str_len)
SordNode*
sord_new_blank(SordWorld* world, const uint8_t* str)
{
- return sord_new_blank_counted(world, str, strlen((const char*)str));
+ const SerdNode node = serd_node_from_string(SERD_URI, str);
+ return sord_new_blank_counted(world, str, node.n_bytes, node.n_chars);
}
static SordNode*
@@ -1022,8 +1026,10 @@ sord_node_from_serd_node(SordWorld* world,
SerdURI abs_uri;
SerdNode abs_uri_node = serd_node_new_uri_from_node(
sn, &base_uri, &abs_uri);
- SordNode* ret = sord_new_uri_counted(world, abs_uri_node.buf,
- abs_uri_node.n_bytes - 1);
+ SordNode* ret = sord_new_uri_counted(world,
+ abs_uri_node.buf,
+ abs_uri_node.n_bytes,
+ abs_uri_node.n_chars);
serd_node_free(&abs_uri_node);
return ret;
}
@@ -1040,14 +1046,15 @@ sord_node_from_serd_node(SordWorld* world,
memcpy(buf + uri_prefix.len, uri_suffix.buf, uri_suffix.len);
buf[uri_len] = '\0';
SordNode* ret = sord_new_uri_counted(
- world, buf, uri_prefix.len + uri_suffix.len);
+ world, buf, uri_prefix.len + uri_suffix.len,
+ uri_prefix.len + uri_suffix.len); // FIXME: UTF-8
free(buf);
return ret;
}
case SERD_BLANK_ID:
case SERD_ANON_BEGIN:
case SERD_ANON:
- return sord_new_blank_counted(world, sn->buf, sn->n_bytes - 1);
+ return sord_new_blank_counted(world, sn->buf, sn->n_bytes, sn->n_chars);
}
return NULL;
}
@@ -1055,7 +1062,7 @@ sord_node_from_serd_node(SordWorld* world,
const SerdNode*
sord_node_to_serd_node(const SordNode* node)
{
- return &node->node;
+ return node ? &node->node : &SERD_NODE_NULL;
}
void
diff --git a/src/sordi.c b/src/sordi.c
index 5410beb..493e4d1 100644
--- a/src/sordi.c
+++ b/src/sordi.c
@@ -118,25 +118,47 @@ main(int argc, char** argv)
const uint8_t* input = (const uint8_t*)argv[a++];
+ const uint8_t* base_uri_str = NULL;
+ SerdURI base_uri;
+ if (a < argc) { // Base URI given on command line
+ const uint8_t* const in_base_uri = (const uint8_t*)argv[a++];
+ if (serd_uri_parse((const uint8_t*)in_base_uri, &base_uri)) {
+ fprintf(stderr, "Invalid base URI <%s>\n", argv[2]);
+ return 1;
+ }
+ base_uri_str = in_base_uri;
+ } else { // Use input file URI
+ base_uri_str = input;
+ }
+
+ if (serd_uri_parse(base_uri_str, &base_uri)) {
+ fprintf(stderr, "Invalid base URI <%s>\n", base_uri_str);
+ return 1;
+ }
+
SordWorld* world = sord_world_new();
SordModel* sord = sord_new(world, SORD_SPO|SORD_OPS, false);
SerdEnv* env = serd_env_new();
- bool success = sord_read_file(sord, env, input, NULL, NULL);
+ bool success = sord_read_file(sord, env, input, base_uri_str, NULL, NULL);
- fprintf(stderr, "Loaded %zu statements\n", sord_num_nodes(world));
-
- SerdURI base_uri;
- if (serd_uri_parse(input, &base_uri)) {
- fprintf(stderr, "Bad input URI <%s>\n", input);
- return 1;
- }
+ fprintf(stderr, "Loaded %zu statements\n", sord_num_quads(sord));
SerdEnv* write_env = serd_env_new();
+ SerdNode base_uri_node = serd_node_from_string(SERD_URI, base_uri_str);
+ serd_env_set_base_uri(write_env, &base_uri_node);
+ serd_env_get_base_uri(write_env, &base_uri);
+
+ SerdStyle output_style = SERD_STYLE_RESOLVED;
+ if (output_syntax == SERD_NTRIPLES) {
+ output_style |= SERD_STYLE_ASCII;
+ } else {
+ output_style |= SERD_STYLE_CURIED | SERD_STYLE_ABBREVIATED;
+ }
SerdWriter* writer = serd_writer_new(
- SERD_TURTLE,
- SERD_STYLE_ABBREVIATED|SERD_STYLE_RESOLVED|SERD_STYLE_CURIED,
+ output_syntax,
+ output_style,
write_env, &base_uri, file_sink, stdout);
// Write @prefix directives
@@ -145,42 +167,7 @@ main(int argc, char** argv)
writer);
// Write statements
- SordQuad pat = { 0, 0, 0, 0 };
- SordIter* iter = sord_find(sord, pat);
- for (; !sord_iter_end(iter); sord_iter_next(iter)) {
- SordQuad tup;
- sord_iter_get(iter, tup);
- const SordNode* s = tup[SORD_SUBJECT];
- const SordNode* p = tup[SORD_PREDICATE];
- const SordNode* o = tup[SORD_OBJECT];
- SerdNode ss = serd_node_from_sord_node(s);
- SerdNode sp = serd_node_from_sord_node(p);
- SerdNode so = serd_node_from_sord_node(o);
- if (sord_node_is_inline_object(o)) {
- so.type = SERD_ANON_BEGIN;
- serd_writer_write_statement(
- writer, NULL, &ss, &sp, &so, NULL, NULL);
- so.type = SERD_ANON;
- SordQuad sub_pat = { o, 0, 0, 0 };
- SordIter* sub_iter = sord_find(sord, sub_pat);
- for (; !sord_iter_end(sub_iter); sord_iter_next(sub_iter)) {
- SordQuad sub_tup;
- sord_iter_get(sub_iter, sub_tup);
- const SordNode* sub_p = sub_tup[SORD_PREDICATE];
- const SordNode* sub_o = sub_tup[SORD_OBJECT];
- SerdNode sub_sp = serd_node_from_sord_node(sub_p);
- SerdNode sub_so = serd_node_from_sord_node(sub_o);
- serd_writer_write_statement(
- writer, NULL, &so, &sub_sp, &sub_so, NULL, NULL);
- }
- sord_iter_free(sub_iter);
- serd_writer_end_anon(writer, &so);
- } else if (!sord_node_is_inline_object(s)) {
- serd_writer_write_statement(
- writer, NULL, &ss, &sp, &so, NULL, NULL);
- }
- }
- sord_iter_free(iter);
+ sord_write_writer(sord, writer, NULL);
serd_writer_finish(writer);
serd_writer_free(writer);
diff --git a/src/syntax.c b/src/syntax.c
index eeb933e..73c5229 100644
--- a/src/syntax.c
+++ b/src/syntax.c
@@ -132,9 +132,14 @@ bool
sord_read_file(SordModel* model,
SerdEnv* env,
const uint8_t* uri,
+ const uint8_t* base_uri,
SordNode* graph,
const uint8_t* blank_prefix)
{
+ if (!base_uri) {
+ base_uri = uri;
+ }
+
const uint8_t* const path = sord_file_uri_to_path(uri);
if (!path) {
fprintf(stderr, "Unable to read non-file URI <%s>\n", uri);
@@ -148,7 +153,7 @@ sord_read_file(SordModel* model,
}
const bool ret = sord_read_file_handle(
- model, env, fd, uri, graph, blank_prefix);
+ model, env, fd, base_uri, graph, blank_prefix);
fclose(fd);
return ret;
}
@@ -257,36 +262,72 @@ file_sink(const void* buf, size_t len, void* stream)
}
static void
-sord_write(const SordModel* model,
- const SordNode* graph,
- SerdWriter* writer)
+write_statement(SordModel* sord, SerdWriter* writer, SordQuad tup,
+ const SordNode* anon_subject)
{
- const SerdNode* g = sord_node_to_serd_node(graph);
- for (SordIter* i = sord_begin(model); !sord_iter_end(i); sord_iter_next(i)) {
- SordQuad quad;
- sord_iter_get(i, quad);
-
- const SerdNode* s = sord_node_to_serd_node(quad[SORD_SUBJECT]);
- const SerdNode* p = sord_node_to_serd_node(quad[SORD_PREDICATE]);
- const SerdNode* o = sord_node_to_serd_node(quad[SORD_OBJECT]);
- const SerdNode* d = sord_node_to_serd_node(
- sord_node_get_datatype(quad[SORD_OBJECT]));
-
- const char* lang_str = sord_node_get_language(quad[SORD_OBJECT]);
- size_t lang_len = lang_str ? strlen(lang_str) : 0;
-
- SerdNode language = SERD_NODE_NULL;
- if (lang_str) {
- language.type = SERD_LITERAL;
- language.n_bytes = lang_len + 1;
- language.n_chars = lang_len;
- language.buf = (const uint8_t*)lang_str;
- };
-
- serd_writer_write_statement(writer, g, s, p, o, d, &language);
+ const SordNode* s = tup[SORD_SUBJECT];
+ const SordNode* p = tup[SORD_PREDICATE];
+ const SordNode* o = tup[SORD_OBJECT];
+ const SordNode* d = sord_node_get_datatype(o);
+ const SerdNode* ss = sord_node_to_serd_node(s);
+ const SerdNode* sp = sord_node_to_serd_node(p);
+ const SerdNode* so = sord_node_to_serd_node(o);
+ const SerdNode* sd = sord_node_to_serd_node(d);
+
+ const char* lang_str = sord_node_get_language(o);
+ size_t lang_len = lang_str ? strlen(lang_str) : 0;
+ SerdNode language = SERD_NODE_NULL;
+ if (lang_str) {
+ language.type = SERD_LITERAL;
+ language.n_bytes = lang_len;
+ language.n_chars = lang_len;
+ language.buf = (const uint8_t*)lang_str;
+ };
+
+ SerdNode subject = *ss;
+ if (anon_subject) {
+ assert(s == anon_subject);
+ subject.type = SERD_ANON;
+ } else if (sord_node_is_inline_object(s)) {
+ return;
+ }
+
+ if (sord_node_is_inline_object(o)) {
+ SerdNode anon = *so;
+ anon.type = SERD_ANON_BEGIN;
+ serd_writer_write_statement(
+ writer, NULL, &subject, sp, &anon, sd, &language);
+ SordQuad sub_pat = { o, 0, 0, 0 };
+ SordIter* sub_iter = sord_find(sord, sub_pat);
+ for (; !sord_iter_end(sub_iter); sord_iter_next(sub_iter)) {
+ SordQuad sub_tup;
+ sord_iter_get(sub_iter, sub_tup);
+ write_statement(sord, writer, sub_tup, o);
+ }
+ sord_iter_free(sub_iter);
+ serd_writer_end_anon(writer, so);
+ } else if (!sord_node_is_inline_object(s) || s == anon_subject) {
+ serd_writer_write_statement(
+ writer, NULL, &subject, sp, so, sd, &language);
}
}
+bool
+sord_write_writer(SordModel* model,
+ SerdWriter* writer,
+ SordNode* graph)
+{
+ SordQuad pat = { 0, 0, 0, graph };
+ SordIter* iter = sord_find(model, pat);
+ for (; !sord_iter_end(iter); sord_iter_next(iter)) {
+ SordQuad tup;
+ sord_iter_get(iter, tup);
+ write_statement(model, writer, tup, NULL);
+ }
+ sord_iter_free(iter);
+ return true;
+}
+
static SerdWriter*
make_writer(SerdEnv* env,
const uint8_t* base_uri_str_in,
@@ -324,7 +365,7 @@ sord_write_file_handle(SordModel* model,
const uint8_t* blank_prefix)
{
SerdWriter* writer = make_writer(env, base_uri_str_in, file_sink, fd);
- sord_write(model, graph, writer);
+ sord_write_writer(model, writer, graph);
serd_writer_free(writer);
return true;
}
@@ -352,7 +393,7 @@ sord_write_string(SordModel* model,
{
struct SerdBuffer buf = { NULL, 0 };
SerdWriter* writer = make_writer(env, base_uri, string_sink, &buf);
- sord_write(model, NULL, writer);
+ sord_write_writer(model, writer, NULL);
serd_writer_free(writer);
string_sink("", 1, &buf);
return buf.buf;