From ff9510dc36fb3d6c5a85e3f4d41220c59b26ee93 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Fri, 21 Jan 2011 06:28:23 +0000 Subject: Preliminary support for writing abbreviated Turtle. git-svn-id: http://svn.drobilla.net/serd/trunk@22 490d8e77-9747-427b-9fa3-0b8f29cee8a0 --- src/serdi.c | 54 ++++++++++++-- src/writer.c | 227 ++++++++++++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 247 insertions(+), 34 deletions(-) (limited to 'src') diff --git a/src/serdi.c b/src/serdi.c index 5db96388..a688b41b 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -90,6 +90,8 @@ event_prefix(void* handle, } else { serd_namespaces_add(state->ns, name, uri_string); } + serd_writer_set_prefix(state->writer, name, uri_string); + return true; } @@ -133,13 +135,39 @@ file_sink(const void* buf, size_t len, void* stream) int main(int argc, char** argv) { - if (argc != 2 && argc != 3) { + if (argc < 2) { return print_usage(argv[0], true); } - const uint8_t* in_filename = (const uint8_t*)argv[1]; + FILE* in_fd = NULL; + SerdSyntax output_syntax = SERD_NTRIPLES; + + int a = 1; + for (; a < argc && argv[a][0] == '-'; ++a) { + if (argv[a][1] == '\0') { + in_fd = stdin; + break; + } else if (argv[a][1] == 'o') { + if (++a == argc) { + fprintf(stderr, "missing value for -i\n"); + return 1; + } + if (!strcmp(argv[a], "turtle")) { + output_syntax = SERD_TURTLE; + } else if (!strcmp(argv[a], "ntriples")) { + output_syntax = SERD_NTRIPLES; + } else { + fprintf(stderr, "unknown output format `%s'\n", argv[a]); + } + } else { + fprintf(stderr, "unknown option `%s'\n", argv[a]); + return print_usage(argv[0], true); + } + } + + const uint8_t* in_filename = (const uint8_t*)argv[a]; - if (serd_uri_string_has_scheme(in_filename)) { + if (!in_fd && serd_uri_string_has_scheme(in_filename)) { // Input is an absolute URI, ensure it's a file: URI and chop scheme if (strncmp((const char*)in_filename, "file:", 5)) { fprintf(stderr, "unsupported URI scheme `%s'\n", in_filename); @@ -166,8 +194,11 @@ main(int argc, char** argv) serd_uri_parse(base_uri_str->buf, &base_uri); - FILE* const in_fd = fopen((const char*)in_filename, "r"); - FILE* out_fd = stdout; + if (!in_fd) { + in_fd = fopen((const char*)in_filename, "r"); + } + + FILE* out_fd = stdout; if (!in_fd) { fprintf(stderr, "failed to open file %s\n", in_filename); @@ -175,8 +206,14 @@ main(int argc, char** argv) } SerdNamespaces ns = serd_namespaces_new(); + + SerdStyle output_style = (output_syntax == SERD_NTRIPLES) + ? SERD_STYLE_ASCII + : SERD_STYLE_ABBREVIATED; + State state = { - serd_writer_new(SERD_NTRIPLES, ns, &base_uri, file_sink, out_fd), + serd_writer_new(output_syntax, output_style, + ns, &base_uri, file_sink, out_fd), ns, base_uri_str, base_uri }; @@ -186,8 +223,11 @@ main(int argc, char** argv) const bool success = serd_reader_read_file(reader, in_fd, in_filename); serd_reader_free(reader); fclose(in_fd); - serd_namespaces_free(state.ns); + + serd_writer_finish(state.writer); serd_writer_free(state.writer); + + serd_namespaces_free(state.ns); serd_string_free(state.base_uri_str); if (success) { diff --git a/src/writer.c b/src/writer.c index ef10594b..684f79e2 100644 --- a/src/writer.c +++ b/src/writer.c @@ -21,16 +21,48 @@ #include "serd/serd.h" +typedef bool (*StatementWriter)(SerdWriter writer, + const SerdString* graph, + const SerdString* subject, + SerdNodeType subject_type, + const SerdString* predicate, + SerdNodeType predicate_type, + const SerdString* object, + SerdNodeType object_type, + const SerdString* object_datatype, + const SerdString* object_lang); + +typedef bool (*NodeWriter)(SerdWriter writer, + SerdNodeType type, + const SerdString* str, + const SerdString* datatype, + const SerdString* lang); + struct SerdWriterImpl { - SerdSyntax syntax; - SerdNamespaces ns; - SerdURI base_uri; - SerdSink sink; - void* stream; + SerdSyntax syntax; + SerdStyle style; + SerdNamespaces ns; + SerdURI base_uri; + SerdSink sink; + void* stream; + StatementWriter write_statement; + NodeWriter write_node; + const SerdString* prev_g; + const SerdString* prev_s; + const SerdString* prev_p; + const SerdString* prev_o; + unsigned indent; }; +typedef enum { + WRITE_NORMAL, + WRITE_URI, + WRITE_STRING +} WriteContext; + static bool -serd_write_ascii(SerdWriter writer, const uint8_t* utf8, size_t n_bytes, const uint8_t esc) +write_text(SerdWriter writer, WriteContext ctx, + const uint8_t* utf8, size_t n_bytes, uint8_t terminator) { char escape[10] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; for (size_t i = 0; i < n_bytes;) { @@ -40,12 +72,16 @@ serd_write_ascii(SerdWriter writer, const uint8_t* utf8, size_t n_bytes, const u case '\n': writer->sink("\\n", 2, writer->stream); continue; case '\r': writer->sink("\\r", 2, writer->stream); continue; case '\t': writer->sink("\\t", 2, writer->stream); continue; - case '"': if (esc == '"') { writer->sink("\\\"", 2, writer->stream); continue; } + case '"': + if (terminator == '"') { + writer->sink("\\\"", 2, writer->stream); + continue; + } // else fall-through default: break; } - if (in == esc) { - sprintf(escape, "\\u%04X", esc); + if (in == terminator) { + sprintf(escape, "\\u%04X", terminator); writer->sink(escape, 6, writer->stream); continue; } @@ -79,6 +115,13 @@ serd_write_ascii(SerdWriter writer, const uint8_t* utf8, size_t n_bytes, const u return false; } + if (!(writer->style & SERD_STYLE_ASCII)) { + // Write UTF-8 input directly to UTF-8 output + writer->sink(utf8, n_bytes, writer->stream); + i += n_bytes - 1; + continue; + } + #define READ_BYTE() do { \ assert(i < n_bytes); \ in = utf8[i++] & 0x3f; \ @@ -106,11 +149,11 @@ serd_write_ascii(SerdWriter writer, const uint8_t* utf8, size_t n_bytes, const u } static bool -serd_write_node(SerdWriter writer, - SerdNodeType type, - const SerdString* str, - const SerdString* datatype, - const SerdString* lang) +write_node(SerdWriter writer, + SerdNodeType type, + const SerdString* str, + const SerdString* datatype, + const SerdString* lang) { const SerdURI* base_uri = &writer->base_uri; SerdNamespaces ns = writer->ns; @@ -123,14 +166,20 @@ serd_write_node(SerdWriter writer, writer->sink(str->buf, str->n_bytes - 1, writer->stream); break; case QNAME: - if (!serd_namespaces_expand(ns, str, &uri_prefix, &uri_suffix)) { - fprintf(stderr, "error: undefined namespace prefix `%s'\n", str->buf); - return false; + switch (writer->syntax) { + case SERD_NTRIPLES: + if (!serd_namespaces_expand(ns, str, &uri_prefix, &uri_suffix)) { + fprintf(stderr, "error: undefined namespace prefix `%s'\n", str->buf); + return false; + } + writer->sink("<", 1, writer->stream); + write_text(writer, WRITE_URI, uri_prefix.buf, uri_prefix.len, '>'); + write_text(writer, WRITE_URI, uri_suffix.buf, uri_suffix.len, '>'); + writer->sink(">", 1, writer->stream); + break; + case SERD_TURTLE: + writer->sink(str->buf, str->n_bytes - 1, writer->stream); } - writer->sink("<", 1, writer->stream); - serd_write_ascii(writer, uri_prefix.buf, uri_prefix.len, '>'); - serd_write_ascii(writer, uri_suffix.buf, uri_suffix.len, '>'); - writer->sink(">", 1, writer->stream); break; case URI: if (!serd_uri_string_has_scheme(str->buf)) { @@ -146,27 +195,44 @@ serd_write_node(SerdWriter writer, } } else { writer->sink("<", 1, writer->stream); - serd_write_ascii(writer, str->buf, str->n_bytes - 1, '>'); + write_text(writer, WRITE_URI, str->buf, str->n_bytes - 1, '>'); writer->sink(">", 1, writer->stream); return true; } return false; case LITERAL: writer->sink("\"", 1, writer->stream); - serd_write_ascii(writer, str->buf, str->n_bytes - 1, '"'); + write_text(writer, WRITE_STRING, str->buf, str->n_bytes - 1, '"'); writer->sink("\"", 1, writer->stream); if (lang) { writer->sink("@", 1, writer->stream); writer->sink(lang->buf, lang->n_bytes - 1, writer->stream); } else if (datatype) { writer->sink("^^", 2, writer->stream); - serd_write_node(writer, URI, datatype, NULL, NULL); + write_node(writer, URI, datatype, NULL, NULL); } break; } return true; } +static void +serd_writer_write_delim(SerdWriter writer, const uint8_t delim) +{ + switch (delim) { + case 0: + case '\n': + break; + default: + writer->sink(" ", 1, writer->stream); + writer->sink(&delim, 1, writer->stream); + } + writer->sink("\n", 1, writer->stream); + for (unsigned i = 0; i < writer->indent; ++i) { + writer->sink("\t", 1, writer->stream); + } +} + SERD_API bool serd_writer_write_statement(SerdWriter writer, @@ -180,18 +246,98 @@ serd_writer_write_statement(SerdWriter writer, const SerdString* object_datatype, const SerdString* object_lang) { - serd_write_node(writer, subject_type, subject, NULL, NULL); + return writer->write_statement(writer, + graph, + subject, subject_type, + predicate, predicate_type, + object, object_type, object_datatype, object_lang); +} + +static bool +serd_writer_write_statement_abbrev(SerdWriter writer, + const SerdString* graph, + const SerdString* subject, + SerdNodeType subject_type, + const SerdString* predicate, + SerdNodeType predicate_type, + const SerdString* object, + SerdNodeType object_type, + const SerdString* object_datatype, + const SerdString* object_lang) +{ + assert(subject && predicate && object); + if (subject == writer->prev_s) { + if (predicate == writer->prev_p) { + ++writer->indent; + serd_writer_write_delim(writer, ','); + write_node(writer, object_type, object, object_datatype, object_lang); + --writer->indent; + } else { + serd_writer_write_delim(writer, ';'); + write_node(writer, predicate_type, predicate, NULL, NULL); + writer->sink(" ", 1, writer->stream); + write_node(writer, object_type, object, object_datatype, object_lang); + } + } else { + if (writer->prev_s) { + --writer->indent; + serd_writer_write_delim(writer, '.'); + serd_writer_write_delim(writer, '\n'); + } + write_node(writer, subject_type, subject, NULL, NULL); + ++writer->indent; + serd_writer_write_delim(writer, 0); + + writer->sink(" ", 1, writer->stream); + write_node(writer, predicate_type, predicate, NULL, NULL); + writer->sink(" ", 1, writer->stream); + + write_node(writer, object_type, object, object_datatype, object_lang); + } + + writer->prev_g = graph; + writer->prev_s = subject; + writer->prev_p = predicate; + writer->prev_o = object; + return true; +} + +SERD_API +bool +serd_writer_write_statement_flat(SerdWriter writer, + const SerdString* graph, + const SerdString* subject, + SerdNodeType subject_type, + const SerdString* predicate, + SerdNodeType predicate_type, + const SerdString* object, + SerdNodeType object_type, + const SerdString* object_datatype, + const SerdString* object_lang) +{ + assert(subject && predicate && object); + write_node(writer, subject_type, subject, NULL, NULL); writer->sink(" ", 1, writer->stream); - serd_write_node(writer, predicate_type, predicate, NULL, NULL); + write_node(writer, predicate_type, predicate, NULL, NULL); writer->sink(" ", 1, writer->stream); - serd_write_node(writer, object_type, object, object_datatype, object_lang); + write_node(writer, object_type, object, object_datatype, object_lang); writer->sink(" .\n", 3, writer->stream); return true; } +SERD_API +void +serd_writer_finish(SerdWriter writer) +{ + if (writer->prev_s) { + writer->sink(" .\n", 3, writer->stream); + } +} + SERD_API SerdWriter serd_writer_new(SerdSyntax syntax, + SerdStyle style, SerdNamespaces ns, const SerdURI* base_uri, SerdSink sink, @@ -199,10 +345,22 @@ serd_writer_new(SerdSyntax syntax, { SerdWriter writer = malloc(sizeof(struct SerdWriterImpl)); writer->syntax = syntax; + writer->style = style; writer->ns = ns; writer->base_uri = *base_uri; writer->sink = sink; writer->stream = stream; + writer->prev_g = 0; + writer->prev_s = 0; + writer->prev_p = 0; + writer->prev_o = 0; + writer->indent = 0; + writer->write_node = write_node; + if ((style & SERD_STYLE_ABBREVIATED)) { + writer->write_statement = serd_writer_write_statement_abbrev; + } else { + writer->write_statement = serd_writer_write_statement_flat; + } return writer; } @@ -214,6 +372,21 @@ serd_writer_set_base_uri(SerdWriter writer, writer->base_uri = *uri; } +SERD_API +void +serd_writer_set_prefix(SerdWriter writer, + const SerdString* name, + const SerdString* uri) +{ + if (writer->syntax != SERD_NTRIPLES) { + writer->sink("@prefix ", 8, writer->stream); + writer->sink(name->buf, name->n_bytes - 1, writer->stream); + writer->sink(": <", 3, writer->stream); + write_text(writer, WRITE_URI, uri->buf, uri->n_bytes - 1, '>'); + writer->sink("> .\n", 4, writer->stream); + } +} + SERD_API void serd_writer_free(SerdWriter writer) -- cgit v1.2.1