aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2011-01-21 06:28:23 +0000
committerDavid Robillard <d@drobilla.net>2011-01-21 06:28:23 +0000
commitff9510dc36fb3d6c5a85e3f4d41220c59b26ee93 (patch)
treee5d64ae74cf195a8d1783d8dfe9b268369c5036e
parentc5a3f407cb46797f8b122d2f3843d382760a2c3b (diff)
downloadserd-ff9510dc36fb3d6c5a85e3f4d41220c59b26ee93.tar.gz
serd-ff9510dc36fb3d6c5a85e3f4d41220c59b26ee93.tar.bz2
serd-ff9510dc36fb3d6c5a85e3f4d41220c59b26ee93.zip
Preliminary support for writing abbreviated Turtle.
git-svn-id: http://svn.drobilla.net/serd/trunk@22 490d8e77-9747-427b-9fa3-0b8f29cee8a0
-rw-r--r--serd/serd.h21
-rw-r--r--src/serdi.c54
-rw-r--r--src/writer.c227
-rw-r--r--wscript36
4 files changed, 286 insertions, 52 deletions
diff --git a/serd/serd.h b/serd/serd.h
index 8efaab20..d350cf4c 100644
--- a/serd/serd.h
+++ b/serd/serd.h
@@ -244,10 +244,17 @@ serd_reader_free(SerdReader reader);
* @{
*/
+typedef enum {
+ SERD_STYLE_ABBREVIATED = 1,
+ SERD_STYLE_ASCII = 1 << 1,
+ SERD_STYLE_ESCAPE_
+} SerdStyle;
+
/** Create a new RDF writer. */
SERD_API
SerdWriter
serd_writer_new(SerdSyntax syntax,
+ SerdStyle style,
SerdNamespaces ns,
const SerdURI* base_uri,
SerdSink sink,
@@ -258,12 +265,19 @@ SERD_API
void
serd_writer_free(SerdWriter writer);
-/** Set the base URI of writer. */
+/** Set the current output base URI. */
SERD_API
void
serd_writer_set_base_uri(SerdWriter writer,
const SerdURI* uri);
+/** Set the current output base URI. */
+SERD_API
+void
+serd_writer_set_prefix(SerdWriter writer,
+ const SerdString* name,
+ const SerdString* uri);
+
/** Write a statement. */
SERD_API
bool
@@ -278,6 +292,11 @@ serd_writer_write_statement(SerdWriter writer,
const SerdString* object_datatype,
const SerdString* object_lang);
+/** Finish a write. */
+SERD_API
+void
+serd_writer_finish(SerdWriter writer);
+
/** @} */
/** @} */
diff --git a/src/serdi.c b/src/serdi.c
index 5db96388..a688b41b 100644
--- a/src/serdi.c
+++ b/src/serdi.c
@@ -90,6 +90,8 @@ event_prefix(void* handle,
} else {
serd_namespaces_add(state->ns, name, uri_string);
}
+ serd_writer_set_prefix(state->writer, name, uri_string);
+
return true;
}
@@ -133,13 +135,39 @@ file_sink(const void* buf, size_t len, void* stream)
int
main(int argc, char** argv)
{
- if (argc != 2 && argc != 3) {
+ if (argc < 2) {
return print_usage(argv[0], true);
}
- const uint8_t* in_filename = (const uint8_t*)argv[1];
+ FILE* in_fd = NULL;
+ SerdSyntax output_syntax = SERD_NTRIPLES;
+
+ int a = 1;
+ for (; a < argc && argv[a][0] == '-'; ++a) {
+ if (argv[a][1] == '\0') {
+ in_fd = stdin;
+ break;
+ } else if (argv[a][1] == 'o') {
+ if (++a == argc) {
+ fprintf(stderr, "missing value for -i\n");
+ return 1;
+ }
+ if (!strcmp(argv[a], "turtle")) {
+ output_syntax = SERD_TURTLE;
+ } else if (!strcmp(argv[a], "ntriples")) {
+ output_syntax = SERD_NTRIPLES;
+ } else {
+ fprintf(stderr, "unknown output format `%s'\n", argv[a]);
+ }
+ } else {
+ fprintf(stderr, "unknown option `%s'\n", argv[a]);
+ return print_usage(argv[0], true);
+ }
+ }
+
+ const uint8_t* in_filename = (const uint8_t*)argv[a];
- if (serd_uri_string_has_scheme(in_filename)) {
+ if (!in_fd && serd_uri_string_has_scheme(in_filename)) {
// Input is an absolute URI, ensure it's a file: URI and chop scheme
if (strncmp((const char*)in_filename, "file:", 5)) {
fprintf(stderr, "unsupported URI scheme `%s'\n", in_filename);
@@ -166,8 +194,11 @@ main(int argc, char** argv)
serd_uri_parse(base_uri_str->buf, &base_uri);
- FILE* const in_fd = fopen((const char*)in_filename, "r");
- FILE* out_fd = stdout;
+ if (!in_fd) {
+ in_fd = fopen((const char*)in_filename, "r");
+ }
+
+ FILE* out_fd = stdout;
if (!in_fd) {
fprintf(stderr, "failed to open file %s\n", in_filename);
@@ -175,8 +206,14 @@ main(int argc, char** argv)
}
SerdNamespaces ns = serd_namespaces_new();
+
+ SerdStyle output_style = (output_syntax == SERD_NTRIPLES)
+ ? SERD_STYLE_ASCII
+ : SERD_STYLE_ABBREVIATED;
+
State state = {
- serd_writer_new(SERD_NTRIPLES, ns, &base_uri, file_sink, out_fd),
+ serd_writer_new(output_syntax, output_style,
+ ns, &base_uri, file_sink, out_fd),
ns, base_uri_str, base_uri
};
@@ -186,8 +223,11 @@ main(int argc, char** argv)
const bool success = serd_reader_read_file(reader, in_fd, in_filename);
serd_reader_free(reader);
fclose(in_fd);
- serd_namespaces_free(state.ns);
+
+ serd_writer_finish(state.writer);
serd_writer_free(state.writer);
+
+ serd_namespaces_free(state.ns);
serd_string_free(state.base_uri_str);
if (success) {
diff --git a/src/writer.c b/src/writer.c
index ef10594b..684f79e2 100644
--- a/src/writer.c
+++ b/src/writer.c
@@ -21,16 +21,48 @@
#include "serd/serd.h"
+typedef bool (*StatementWriter)(SerdWriter writer,
+ const SerdString* graph,
+ const SerdString* subject,
+ SerdNodeType subject_type,
+ const SerdString* predicate,
+ SerdNodeType predicate_type,
+ const SerdString* object,
+ SerdNodeType object_type,
+ const SerdString* object_datatype,
+ const SerdString* object_lang);
+
+typedef bool (*NodeWriter)(SerdWriter writer,
+ SerdNodeType type,
+ const SerdString* str,
+ const SerdString* datatype,
+ const SerdString* lang);
+
struct SerdWriterImpl {
- SerdSyntax syntax;
- SerdNamespaces ns;
- SerdURI base_uri;
- SerdSink sink;
- void* stream;
+ SerdSyntax syntax;
+ SerdStyle style;
+ SerdNamespaces ns;
+ SerdURI base_uri;
+ SerdSink sink;
+ void* stream;
+ StatementWriter write_statement;
+ NodeWriter write_node;
+ const SerdString* prev_g;
+ const SerdString* prev_s;
+ const SerdString* prev_p;
+ const SerdString* prev_o;
+ unsigned indent;
};
+typedef enum {
+ WRITE_NORMAL,
+ WRITE_URI,
+ WRITE_STRING
+} WriteContext;
+
static bool
-serd_write_ascii(SerdWriter writer, const uint8_t* utf8, size_t n_bytes, const uint8_t esc)
+write_text(SerdWriter writer, WriteContext ctx,
+ const uint8_t* utf8, size_t n_bytes, uint8_t terminator)
{
char escape[10] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
for (size_t i = 0; i < n_bytes;) {
@@ -40,12 +72,16 @@ serd_write_ascii(SerdWriter writer, const uint8_t* utf8, size_t n_bytes, const u
case '\n': writer->sink("\\n", 2, writer->stream); continue;
case '\r': writer->sink("\\r", 2, writer->stream); continue;
case '\t': writer->sink("\\t", 2, writer->stream); continue;
- case '"': if (esc == '"') { writer->sink("\\\"", 2, writer->stream); continue; }
+ case '"':
+ if (terminator == '"') {
+ writer->sink("\\\"", 2, writer->stream);
+ continue;
+ } // else fall-through
default: break;
}
- if (in == esc) {
- sprintf(escape, "\\u%04X", esc);
+ if (in == terminator) {
+ sprintf(escape, "\\u%04X", terminator);
writer->sink(escape, 6, writer->stream);
continue;
}
@@ -79,6 +115,13 @@ serd_write_ascii(SerdWriter writer, const uint8_t* utf8, size_t n_bytes, const u
return false;
}
+ if (!(writer->style & SERD_STYLE_ASCII)) {
+ // Write UTF-8 input directly to UTF-8 output
+ writer->sink(utf8, n_bytes, writer->stream);
+ i += n_bytes - 1;
+ continue;
+ }
+
#define READ_BYTE() do { \
assert(i < n_bytes); \
in = utf8[i++] & 0x3f; \
@@ -106,11 +149,11 @@ serd_write_ascii(SerdWriter writer, const uint8_t* utf8, size_t n_bytes, const u
}
static bool
-serd_write_node(SerdWriter writer,
- SerdNodeType type,
- const SerdString* str,
- const SerdString* datatype,
- const SerdString* lang)
+write_node(SerdWriter writer,
+ SerdNodeType type,
+ const SerdString* str,
+ const SerdString* datatype,
+ const SerdString* lang)
{
const SerdURI* base_uri = &writer->base_uri;
SerdNamespaces ns = writer->ns;
@@ -123,14 +166,20 @@ serd_write_node(SerdWriter writer,
writer->sink(str->buf, str->n_bytes - 1, writer->stream);
break;
case QNAME:
- if (!serd_namespaces_expand(ns, str, &uri_prefix, &uri_suffix)) {
- fprintf(stderr, "error: undefined namespace prefix `%s'\n", str->buf);
- return false;
+ switch (writer->syntax) {
+ case SERD_NTRIPLES:
+ if (!serd_namespaces_expand(ns, str, &uri_prefix, &uri_suffix)) {
+ fprintf(stderr, "error: undefined namespace prefix `%s'\n", str->buf);
+ return false;
+ }
+ writer->sink("<", 1, writer->stream);
+ write_text(writer, WRITE_URI, uri_prefix.buf, uri_prefix.len, '>');
+ write_text(writer, WRITE_URI, uri_suffix.buf, uri_suffix.len, '>');
+ writer->sink(">", 1, writer->stream);
+ break;
+ case SERD_TURTLE:
+ writer->sink(str->buf, str->n_bytes - 1, writer->stream);
}
- writer->sink("<", 1, writer->stream);
- serd_write_ascii(writer, uri_prefix.buf, uri_prefix.len, '>');
- serd_write_ascii(writer, uri_suffix.buf, uri_suffix.len, '>');
- writer->sink(">", 1, writer->stream);
break;
case URI:
if (!serd_uri_string_has_scheme(str->buf)) {
@@ -146,27 +195,44 @@ serd_write_node(SerdWriter writer,
}
} else {
writer->sink("<", 1, writer->stream);
- serd_write_ascii(writer, str->buf, str->n_bytes - 1, '>');
+ write_text(writer, WRITE_URI, str->buf, str->n_bytes - 1, '>');
writer->sink(">", 1, writer->stream);
return true;
}
return false;
case LITERAL:
writer->sink("\"", 1, writer->stream);
- serd_write_ascii(writer, str->buf, str->n_bytes - 1, '"');
+ write_text(writer, WRITE_STRING, str->buf, str->n_bytes - 1, '"');
writer->sink("\"", 1, writer->stream);
if (lang) {
writer->sink("@", 1, writer->stream);
writer->sink(lang->buf, lang->n_bytes - 1, writer->stream);
} else if (datatype) {
writer->sink("^^", 2, writer->stream);
- serd_write_node(writer, URI, datatype, NULL, NULL);
+ write_node(writer, URI, datatype, NULL, NULL);
}
break;
}
return true;
}
+static void
+serd_writer_write_delim(SerdWriter writer, const uint8_t delim)
+{
+ switch (delim) {
+ case 0:
+ case '\n':
+ break;
+ default:
+ writer->sink(" ", 1, writer->stream);
+ writer->sink(&delim, 1, writer->stream);
+ }
+ writer->sink("\n", 1, writer->stream);
+ for (unsigned i = 0; i < writer->indent; ++i) {
+ writer->sink("\t", 1, writer->stream);
+ }
+}
+
SERD_API
bool
serd_writer_write_statement(SerdWriter writer,
@@ -180,18 +246,98 @@ serd_writer_write_statement(SerdWriter writer,
const SerdString* object_datatype,
const SerdString* object_lang)
{
- serd_write_node(writer, subject_type, subject, NULL, NULL);
+ return writer->write_statement(writer,
+ graph,
+ subject, subject_type,
+ predicate, predicate_type,
+ object, object_type, object_datatype, object_lang);
+}
+
+static bool
+serd_writer_write_statement_abbrev(SerdWriter writer,
+ const SerdString* graph,
+ const SerdString* subject,
+ SerdNodeType subject_type,
+ const SerdString* predicate,
+ SerdNodeType predicate_type,
+ const SerdString* object,
+ SerdNodeType object_type,
+ const SerdString* object_datatype,
+ const SerdString* object_lang)
+{
+ assert(subject && predicate && object);
+ if (subject == writer->prev_s) {
+ if (predicate == writer->prev_p) {
+ ++writer->indent;
+ serd_writer_write_delim(writer, ',');
+ write_node(writer, object_type, object, object_datatype, object_lang);
+ --writer->indent;
+ } else {
+ serd_writer_write_delim(writer, ';');
+ write_node(writer, predicate_type, predicate, NULL, NULL);
+ writer->sink(" ", 1, writer->stream);
+ write_node(writer, object_type, object, object_datatype, object_lang);
+ }
+ } else {
+ if (writer->prev_s) {
+ --writer->indent;
+ serd_writer_write_delim(writer, '.');
+ serd_writer_write_delim(writer, '\n');
+ }
+ write_node(writer, subject_type, subject, NULL, NULL);
+ ++writer->indent;
+ serd_writer_write_delim(writer, 0);
+
+ writer->sink(" ", 1, writer->stream);
+ write_node(writer, predicate_type, predicate, NULL, NULL);
+ writer->sink(" ", 1, writer->stream);
+
+ write_node(writer, object_type, object, object_datatype, object_lang);
+ }
+
+ writer->prev_g = graph;
+ writer->prev_s = subject;
+ writer->prev_p = predicate;
+ writer->prev_o = object;
+ return true;
+}
+
+SERD_API
+bool
+serd_writer_write_statement_flat(SerdWriter writer,
+ const SerdString* graph,
+ const SerdString* subject,
+ SerdNodeType subject_type,
+ const SerdString* predicate,
+ SerdNodeType predicate_type,
+ const SerdString* object,
+ SerdNodeType object_type,
+ const SerdString* object_datatype,
+ const SerdString* object_lang)
+{
+ assert(subject && predicate && object);
+ write_node(writer, subject_type, subject, NULL, NULL);
writer->sink(" ", 1, writer->stream);
- serd_write_node(writer, predicate_type, predicate, NULL, NULL);
+ write_node(writer, predicate_type, predicate, NULL, NULL);
writer->sink(" ", 1, writer->stream);
- serd_write_node(writer, object_type, object, object_datatype, object_lang);
+ write_node(writer, object_type, object, object_datatype, object_lang);
writer->sink(" .\n", 3, writer->stream);
return true;
}
SERD_API
+void
+serd_writer_finish(SerdWriter writer)
+{
+ if (writer->prev_s) {
+ writer->sink(" .\n", 3, writer->stream);
+ }
+}
+
+SERD_API
SerdWriter
serd_writer_new(SerdSyntax syntax,
+ SerdStyle style,
SerdNamespaces ns,
const SerdURI* base_uri,
SerdSink sink,
@@ -199,10 +345,22 @@ serd_writer_new(SerdSyntax syntax,
{
SerdWriter writer = malloc(sizeof(struct SerdWriterImpl));
writer->syntax = syntax;
+ writer->style = style;
writer->ns = ns;
writer->base_uri = *base_uri;
writer->sink = sink;
writer->stream = stream;
+ writer->prev_g = 0;
+ writer->prev_s = 0;
+ writer->prev_p = 0;
+ writer->prev_o = 0;
+ writer->indent = 0;
+ writer->write_node = write_node;
+ if ((style & SERD_STYLE_ABBREVIATED)) {
+ writer->write_statement = serd_writer_write_statement_abbrev;
+ } else {
+ writer->write_statement = serd_writer_write_statement_flat;
+ }
return writer;
}
@@ -216,6 +374,21 @@ serd_writer_set_base_uri(SerdWriter writer,
SERD_API
void
+serd_writer_set_prefix(SerdWriter writer,
+ const SerdString* name,
+ const SerdString* uri)
+{
+ if (writer->syntax != SERD_NTRIPLES) {
+ writer->sink("@prefix ", 8, writer->stream);
+ writer->sink(name->buf, name->n_bytes - 1, writer->stream);
+ writer->sink(": <", 3, writer->stream);
+ write_text(writer, WRITE_URI, uri->buf, uri->n_bytes - 1, '>');
+ writer->sink("> .\n", 4, writer->stream);
+ }
+}
+
+SERD_API
+void
serd_writer_free(SerdWriter writer)
{
SerdWriter const me = (SerdWriter)writer;
diff --git a/wscript b/wscript
index f3bf2830..e8f18366 100644
--- a/wscript
+++ b/wscript
@@ -124,10 +124,20 @@ def test(ctx):
autowaf.pre_test(ctx, APPNAME)
+ autowaf.run_tests(ctx, APPNAME,
+ ['./serdi_static > /dev/null',
+ './serdi_static ftp://example.org/unsupported.ttl > /dev/null'],
+ 1, name='serdi-fail')
+
+ autowaf.run_tests(ctx, APPNAME,
+ ['./serdi_static file:../tests/manifest.ttl > /dev/null',
+ './serdi_static ../tests/UTF-8.ttl > /dev/null'],
+ 0, name='serdi-succeed')
+
commands = []
for test in good_tests:
base_uri = 'http://www.w3.org/2001/sw/DataAccess/df1/' + test
- commands = commands + [ './serdi_static ../%s \'%s\' > %s.out' % (test, base_uri, test) ]
+ commands += [ './serdi_static ../%s \'%s\' > %s.out' % (test, base_uri, test) ]
autowaf.run_tests(ctx, APPNAME, commands, 0, name='good')
@@ -145,24 +155,16 @@ def test(ctx):
commands = []
for test in bad_tests:
- commands = commands + [ './serdi_static ../%s \'http://www.w3.org/2001/sw/DataAccess/df1/%s\' > %s.out' % (test, test, test) ]
+ commands += [ './serdi_static ../%s \'http://www.w3.org/2001/sw/DataAccess/df1/%s\' > %s.out' % (test, test, test) ]
autowaf.run_tests(ctx, APPNAME, commands, 1, name='bad')
- autowaf.run_tests(ctx, APPNAME,
- ['./serdi_static > /dev/null'],
- 1, name='serdi-no-args')
-
- autowaf.run_tests(ctx, APPNAME,
- ['./serdi_static file:../tests/manifest.ttl > /dev/null'],
- 0, name='serdi-file-uri')
-
- autowaf.run_tests(ctx, APPNAME,
- ['./serdi_static ftp://example.org/unsupported.ttl > /dev/null'],
- 1, name='serdi-bad-uri')
-
- autowaf.run_tests(ctx, APPNAME,
- ['./serdi_static ../tests/UTF-8.ttl > /dev/null'],
- 0, name='utf8')
+# commands = []
+# for test in good_tests:
+# out_filename = test + '.thru'
+# commands += [ './serdi_static -o turtle ../%s \'%s\' | ./serdi_static - \'%s\' > %s.out' % (test, base_uri, base_uri, test) ]
+#
+# autowaf.run_tests(ctx, APPNAME, commands, 0, name='turtle-write')
+#
autowaf.post_test(ctx, APPNAME)