aboutsummaryrefslogtreecommitdiffstats
path: root/src/writer.c
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2011-01-21 06:28:23 +0000
committerDavid Robillard <d@drobilla.net>2011-01-21 06:28:23 +0000
commitff9510dc36fb3d6c5a85e3f4d41220c59b26ee93 (patch)
treee5d64ae74cf195a8d1783d8dfe9b268369c5036e /src/writer.c
parentc5a3f407cb46797f8b122d2f3843d382760a2c3b (diff)
downloadserd-ff9510dc36fb3d6c5a85e3f4d41220c59b26ee93.tar.gz
serd-ff9510dc36fb3d6c5a85e3f4d41220c59b26ee93.tar.bz2
serd-ff9510dc36fb3d6c5a85e3f4d41220c59b26ee93.zip
Preliminary support for writing abbreviated Turtle.
git-svn-id: http://svn.drobilla.net/serd/trunk@22 490d8e77-9747-427b-9fa3-0b8f29cee8a0
Diffstat (limited to 'src/writer.c')
-rw-r--r--src/writer.c227
1 files changed, 200 insertions, 27 deletions
diff --git a/src/writer.c b/src/writer.c
index ef10594b..684f79e2 100644
--- a/src/writer.c
+++ b/src/writer.c
@@ -21,16 +21,48 @@
#include "serd/serd.h"
+typedef bool (*StatementWriter)(SerdWriter writer,
+ const SerdString* graph,
+ const SerdString* subject,
+ SerdNodeType subject_type,
+ const SerdString* predicate,
+ SerdNodeType predicate_type,
+ const SerdString* object,
+ SerdNodeType object_type,
+ const SerdString* object_datatype,
+ const SerdString* object_lang);
+
+typedef bool (*NodeWriter)(SerdWriter writer,
+ SerdNodeType type,
+ const SerdString* str,
+ const SerdString* datatype,
+ const SerdString* lang);
+
struct SerdWriterImpl {
- SerdSyntax syntax;
- SerdNamespaces ns;
- SerdURI base_uri;
- SerdSink sink;
- void* stream;
+ SerdSyntax syntax;
+ SerdStyle style;
+ SerdNamespaces ns;
+ SerdURI base_uri;
+ SerdSink sink;
+ void* stream;
+ StatementWriter write_statement;
+ NodeWriter write_node;
+ const SerdString* prev_g;
+ const SerdString* prev_s;
+ const SerdString* prev_p;
+ const SerdString* prev_o;
+ unsigned indent;
};
+typedef enum {
+ WRITE_NORMAL,
+ WRITE_URI,
+ WRITE_STRING
+} WriteContext;
+
static bool
-serd_write_ascii(SerdWriter writer, const uint8_t* utf8, size_t n_bytes, const uint8_t esc)
+write_text(SerdWriter writer, WriteContext ctx,
+ const uint8_t* utf8, size_t n_bytes, uint8_t terminator)
{
char escape[10] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
for (size_t i = 0; i < n_bytes;) {
@@ -40,12 +72,16 @@ serd_write_ascii(SerdWriter writer, const uint8_t* utf8, size_t n_bytes, const u
case '\n': writer->sink("\\n", 2, writer->stream); continue;
case '\r': writer->sink("\\r", 2, writer->stream); continue;
case '\t': writer->sink("\\t", 2, writer->stream); continue;
- case '"': if (esc == '"') { writer->sink("\\\"", 2, writer->stream); continue; }
+ case '"':
+ if (terminator == '"') {
+ writer->sink("\\\"", 2, writer->stream);
+ continue;
+ } // else fall-through
default: break;
}
- if (in == esc) {
- sprintf(escape, "\\u%04X", esc);
+ if (in == terminator) {
+ sprintf(escape, "\\u%04X", terminator);
writer->sink(escape, 6, writer->stream);
continue;
}
@@ -79,6 +115,13 @@ serd_write_ascii(SerdWriter writer, const uint8_t* utf8, size_t n_bytes, const u
return false;
}
+ if (!(writer->style & SERD_STYLE_ASCII)) {
+ // Write UTF-8 input directly to UTF-8 output
+ writer->sink(utf8, n_bytes, writer->stream);
+ i += n_bytes - 1;
+ continue;
+ }
+
#define READ_BYTE() do { \
assert(i < n_bytes); \
in = utf8[i++] & 0x3f; \
@@ -106,11 +149,11 @@ serd_write_ascii(SerdWriter writer, const uint8_t* utf8, size_t n_bytes, const u
}
static bool
-serd_write_node(SerdWriter writer,
- SerdNodeType type,
- const SerdString* str,
- const SerdString* datatype,
- const SerdString* lang)
+write_node(SerdWriter writer,
+ SerdNodeType type,
+ const SerdString* str,
+ const SerdString* datatype,
+ const SerdString* lang)
{
const SerdURI* base_uri = &writer->base_uri;
SerdNamespaces ns = writer->ns;
@@ -123,14 +166,20 @@ serd_write_node(SerdWriter writer,
writer->sink(str->buf, str->n_bytes - 1, writer->stream);
break;
case QNAME:
- if (!serd_namespaces_expand(ns, str, &uri_prefix, &uri_suffix)) {
- fprintf(stderr, "error: undefined namespace prefix `%s'\n", str->buf);
- return false;
+ switch (writer->syntax) {
+ case SERD_NTRIPLES:
+ if (!serd_namespaces_expand(ns, str, &uri_prefix, &uri_suffix)) {
+ fprintf(stderr, "error: undefined namespace prefix `%s'\n", str->buf);
+ return false;
+ }
+ writer->sink("<", 1, writer->stream);
+ write_text(writer, WRITE_URI, uri_prefix.buf, uri_prefix.len, '>');
+ write_text(writer, WRITE_URI, uri_suffix.buf, uri_suffix.len, '>');
+ writer->sink(">", 1, writer->stream);
+ break;
+ case SERD_TURTLE:
+ writer->sink(str->buf, str->n_bytes - 1, writer->stream);
}
- writer->sink("<", 1, writer->stream);
- serd_write_ascii(writer, uri_prefix.buf, uri_prefix.len, '>');
- serd_write_ascii(writer, uri_suffix.buf, uri_suffix.len, '>');
- writer->sink(">", 1, writer->stream);
break;
case URI:
if (!serd_uri_string_has_scheme(str->buf)) {
@@ -146,27 +195,44 @@ serd_write_node(SerdWriter writer,
}
} else {
writer->sink("<", 1, writer->stream);
- serd_write_ascii(writer, str->buf, str->n_bytes - 1, '>');
+ write_text(writer, WRITE_URI, str->buf, str->n_bytes - 1, '>');
writer->sink(">", 1, writer->stream);
return true;
}
return false;
case LITERAL:
writer->sink("\"", 1, writer->stream);
- serd_write_ascii(writer, str->buf, str->n_bytes - 1, '"');
+ write_text(writer, WRITE_STRING, str->buf, str->n_bytes - 1, '"');
writer->sink("\"", 1, writer->stream);
if (lang) {
writer->sink("@", 1, writer->stream);
writer->sink(lang->buf, lang->n_bytes - 1, writer->stream);
} else if (datatype) {
writer->sink("^^", 2, writer->stream);
- serd_write_node(writer, URI, datatype, NULL, NULL);
+ write_node(writer, URI, datatype, NULL, NULL);
}
break;
}
return true;
}
+static void
+serd_writer_write_delim(SerdWriter writer, const uint8_t delim)
+{
+ switch (delim) {
+ case 0:
+ case '\n':
+ break;
+ default:
+ writer->sink(" ", 1, writer->stream);
+ writer->sink(&delim, 1, writer->stream);
+ }
+ writer->sink("\n", 1, writer->stream);
+ for (unsigned i = 0; i < writer->indent; ++i) {
+ writer->sink("\t", 1, writer->stream);
+ }
+}
+
SERD_API
bool
serd_writer_write_statement(SerdWriter writer,
@@ -180,18 +246,98 @@ serd_writer_write_statement(SerdWriter writer,
const SerdString* object_datatype,
const SerdString* object_lang)
{
- serd_write_node(writer, subject_type, subject, NULL, NULL);
+ return writer->write_statement(writer,
+ graph,
+ subject, subject_type,
+ predicate, predicate_type,
+ object, object_type, object_datatype, object_lang);
+}
+
+static bool
+serd_writer_write_statement_abbrev(SerdWriter writer,
+ const SerdString* graph,
+ const SerdString* subject,
+ SerdNodeType subject_type,
+ const SerdString* predicate,
+ SerdNodeType predicate_type,
+ const SerdString* object,
+ SerdNodeType object_type,
+ const SerdString* object_datatype,
+ const SerdString* object_lang)
+{
+ assert(subject && predicate && object);
+ if (subject == writer->prev_s) {
+ if (predicate == writer->prev_p) {
+ ++writer->indent;
+ serd_writer_write_delim(writer, ',');
+ write_node(writer, object_type, object, object_datatype, object_lang);
+ --writer->indent;
+ } else {
+ serd_writer_write_delim(writer, ';');
+ write_node(writer, predicate_type, predicate, NULL, NULL);
+ writer->sink(" ", 1, writer->stream);
+ write_node(writer, object_type, object, object_datatype, object_lang);
+ }
+ } else {
+ if (writer->prev_s) {
+ --writer->indent;
+ serd_writer_write_delim(writer, '.');
+ serd_writer_write_delim(writer, '\n');
+ }
+ write_node(writer, subject_type, subject, NULL, NULL);
+ ++writer->indent;
+ serd_writer_write_delim(writer, 0);
+
+ writer->sink(" ", 1, writer->stream);
+ write_node(writer, predicate_type, predicate, NULL, NULL);
+ writer->sink(" ", 1, writer->stream);
+
+ write_node(writer, object_type, object, object_datatype, object_lang);
+ }
+
+ writer->prev_g = graph;
+ writer->prev_s = subject;
+ writer->prev_p = predicate;
+ writer->prev_o = object;
+ return true;
+}
+
+SERD_API
+bool
+serd_writer_write_statement_flat(SerdWriter writer,
+ const SerdString* graph,
+ const SerdString* subject,
+ SerdNodeType subject_type,
+ const SerdString* predicate,
+ SerdNodeType predicate_type,
+ const SerdString* object,
+ SerdNodeType object_type,
+ const SerdString* object_datatype,
+ const SerdString* object_lang)
+{
+ assert(subject && predicate && object);
+ write_node(writer, subject_type, subject, NULL, NULL);
writer->sink(" ", 1, writer->stream);
- serd_write_node(writer, predicate_type, predicate, NULL, NULL);
+ write_node(writer, predicate_type, predicate, NULL, NULL);
writer->sink(" ", 1, writer->stream);
- serd_write_node(writer, object_type, object, object_datatype, object_lang);
+ write_node(writer, object_type, object, object_datatype, object_lang);
writer->sink(" .\n", 3, writer->stream);
return true;
}
SERD_API
+void
+serd_writer_finish(SerdWriter writer)
+{
+ if (writer->prev_s) {
+ writer->sink(" .\n", 3, writer->stream);
+ }
+}
+
+SERD_API
SerdWriter
serd_writer_new(SerdSyntax syntax,
+ SerdStyle style,
SerdNamespaces ns,
const SerdURI* base_uri,
SerdSink sink,
@@ -199,10 +345,22 @@ serd_writer_new(SerdSyntax syntax,
{
SerdWriter writer = malloc(sizeof(struct SerdWriterImpl));
writer->syntax = syntax;
+ writer->style = style;
writer->ns = ns;
writer->base_uri = *base_uri;
writer->sink = sink;
writer->stream = stream;
+ writer->prev_g = 0;
+ writer->prev_s = 0;
+ writer->prev_p = 0;
+ writer->prev_o = 0;
+ writer->indent = 0;
+ writer->write_node = write_node;
+ if ((style & SERD_STYLE_ABBREVIATED)) {
+ writer->write_statement = serd_writer_write_statement_abbrev;
+ } else {
+ writer->write_statement = serd_writer_write_statement_flat;
+ }
return writer;
}
@@ -216,6 +374,21 @@ serd_writer_set_base_uri(SerdWriter writer,
SERD_API
void
+serd_writer_set_prefix(SerdWriter writer,
+ const SerdString* name,
+ const SerdString* uri)
+{
+ if (writer->syntax != SERD_NTRIPLES) {
+ writer->sink("@prefix ", 8, writer->stream);
+ writer->sink(name->buf, name->n_bytes - 1, writer->stream);
+ writer->sink(": <", 3, writer->stream);
+ write_text(writer, WRITE_URI, uri->buf, uri->n_bytes - 1, '>');
+ writer->sink("> .\n", 4, writer->stream);
+ }
+}
+
+SERD_API
+void
serd_writer_free(SerdWriter writer)
{
SerdWriter const me = (SerdWriter)writer;