diff options
-rw-r--r-- | serd/serd.h | 21 | ||||
-rw-r--r-- | src/serdi.c | 54 | ||||
-rw-r--r-- | src/writer.c | 227 | ||||
-rw-r--r-- | wscript | 36 |
4 files changed, 286 insertions, 52 deletions
diff --git a/serd/serd.h b/serd/serd.h index 8efaab20..d350cf4c 100644 --- a/serd/serd.h +++ b/serd/serd.h @@ -244,10 +244,17 @@ serd_reader_free(SerdReader reader); * @{ */ +typedef enum { + SERD_STYLE_ABBREVIATED = 1, + SERD_STYLE_ASCII = 1 << 1, + SERD_STYLE_ESCAPE_ +} SerdStyle; + /** Create a new RDF writer. */ SERD_API SerdWriter serd_writer_new(SerdSyntax syntax, + SerdStyle style, SerdNamespaces ns, const SerdURI* base_uri, SerdSink sink, @@ -258,12 +265,19 @@ SERD_API void serd_writer_free(SerdWriter writer); -/** Set the base URI of writer. */ +/** Set the current output base URI. */ SERD_API void serd_writer_set_base_uri(SerdWriter writer, const SerdURI* uri); +/** Set the current output base URI. */ +SERD_API +void +serd_writer_set_prefix(SerdWriter writer, + const SerdString* name, + const SerdString* uri); + /** Write a statement. */ SERD_API bool @@ -278,6 +292,11 @@ serd_writer_write_statement(SerdWriter writer, const SerdString* object_datatype, const SerdString* object_lang); +/** Finish a write. */ +SERD_API +void +serd_writer_finish(SerdWriter writer); + /** @} */ /** @} */ diff --git a/src/serdi.c b/src/serdi.c index 5db96388..a688b41b 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -90,6 +90,8 @@ event_prefix(void* handle, } else { serd_namespaces_add(state->ns, name, uri_string); } + serd_writer_set_prefix(state->writer, name, uri_string); + return true; } @@ -133,13 +135,39 @@ file_sink(const void* buf, size_t len, void* stream) int main(int argc, char** argv) { - if (argc != 2 && argc != 3) { + if (argc < 2) { return print_usage(argv[0], true); } - const uint8_t* in_filename = (const uint8_t*)argv[1]; + FILE* in_fd = NULL; + SerdSyntax output_syntax = SERD_NTRIPLES; + + int a = 1; + for (; a < argc && argv[a][0] == '-'; ++a) { + if (argv[a][1] == '\0') { + in_fd = stdin; + break; + } else if (argv[a][1] == 'o') { + if (++a == argc) { + fprintf(stderr, "missing value for -i\n"); + return 1; + } + if (!strcmp(argv[a], "turtle")) { + output_syntax = SERD_TURTLE; + } else if (!strcmp(argv[a], "ntriples")) { + output_syntax = SERD_NTRIPLES; + } else { + fprintf(stderr, "unknown output format `%s'\n", argv[a]); + } + } else { + fprintf(stderr, "unknown option `%s'\n", argv[a]); + return print_usage(argv[0], true); + } + } + + const uint8_t* in_filename = (const uint8_t*)argv[a]; - if (serd_uri_string_has_scheme(in_filename)) { + if (!in_fd && serd_uri_string_has_scheme(in_filename)) { // Input is an absolute URI, ensure it's a file: URI and chop scheme if (strncmp((const char*)in_filename, "file:", 5)) { fprintf(stderr, "unsupported URI scheme `%s'\n", in_filename); @@ -166,8 +194,11 @@ main(int argc, char** argv) serd_uri_parse(base_uri_str->buf, &base_uri); - FILE* const in_fd = fopen((const char*)in_filename, "r"); - FILE* out_fd = stdout; + if (!in_fd) { + in_fd = fopen((const char*)in_filename, "r"); + } + + FILE* out_fd = stdout; if (!in_fd) { fprintf(stderr, "failed to open file %s\n", in_filename); @@ -175,8 +206,14 @@ main(int argc, char** argv) } SerdNamespaces ns = serd_namespaces_new(); + + SerdStyle output_style = (output_syntax == SERD_NTRIPLES) + ? SERD_STYLE_ASCII + : SERD_STYLE_ABBREVIATED; + State state = { - serd_writer_new(SERD_NTRIPLES, ns, &base_uri, file_sink, out_fd), + serd_writer_new(output_syntax, output_style, + ns, &base_uri, file_sink, out_fd), ns, base_uri_str, base_uri }; @@ -186,8 +223,11 @@ main(int argc, char** argv) const bool success = serd_reader_read_file(reader, in_fd, in_filename); serd_reader_free(reader); fclose(in_fd); - serd_namespaces_free(state.ns); + + serd_writer_finish(state.writer); serd_writer_free(state.writer); + + serd_namespaces_free(state.ns); serd_string_free(state.base_uri_str); if (success) { diff --git a/src/writer.c b/src/writer.c index ef10594b..684f79e2 100644 --- a/src/writer.c +++ b/src/writer.c @@ -21,16 +21,48 @@ #include "serd/serd.h" +typedef bool (*StatementWriter)(SerdWriter writer, + const SerdString* graph, + const SerdString* subject, + SerdNodeType subject_type, + const SerdString* predicate, + SerdNodeType predicate_type, + const SerdString* object, + SerdNodeType object_type, + const SerdString* object_datatype, + const SerdString* object_lang); + +typedef bool (*NodeWriter)(SerdWriter writer, + SerdNodeType type, + const SerdString* str, + const SerdString* datatype, + const SerdString* lang); + struct SerdWriterImpl { - SerdSyntax syntax; - SerdNamespaces ns; - SerdURI base_uri; - SerdSink sink; - void* stream; + SerdSyntax syntax; + SerdStyle style; + SerdNamespaces ns; + SerdURI base_uri; + SerdSink sink; + void* stream; + StatementWriter write_statement; + NodeWriter write_node; + const SerdString* prev_g; + const SerdString* prev_s; + const SerdString* prev_p; + const SerdString* prev_o; + unsigned indent; }; +typedef enum { + WRITE_NORMAL, + WRITE_URI, + WRITE_STRING +} WriteContext; + static bool -serd_write_ascii(SerdWriter writer, const uint8_t* utf8, size_t n_bytes, const uint8_t esc) +write_text(SerdWriter writer, WriteContext ctx, + const uint8_t* utf8, size_t n_bytes, uint8_t terminator) { char escape[10] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; for (size_t i = 0; i < n_bytes;) { @@ -40,12 +72,16 @@ serd_write_ascii(SerdWriter writer, const uint8_t* utf8, size_t n_bytes, const u case '\n': writer->sink("\\n", 2, writer->stream); continue; case '\r': writer->sink("\\r", 2, writer->stream); continue; case '\t': writer->sink("\\t", 2, writer->stream); continue; - case '"': if (esc == '"') { writer->sink("\\\"", 2, writer->stream); continue; } + case '"': + if (terminator == '"') { + writer->sink("\\\"", 2, writer->stream); + continue; + } // else fall-through default: break; } - if (in == esc) { - sprintf(escape, "\\u%04X", esc); + if (in == terminator) { + sprintf(escape, "\\u%04X", terminator); writer->sink(escape, 6, writer->stream); continue; } @@ -79,6 +115,13 @@ serd_write_ascii(SerdWriter writer, const uint8_t* utf8, size_t n_bytes, const u return false; } + if (!(writer->style & SERD_STYLE_ASCII)) { + // Write UTF-8 input directly to UTF-8 output + writer->sink(utf8, n_bytes, writer->stream); + i += n_bytes - 1; + continue; + } + #define READ_BYTE() do { \ assert(i < n_bytes); \ in = utf8[i++] & 0x3f; \ @@ -106,11 +149,11 @@ serd_write_ascii(SerdWriter writer, const uint8_t* utf8, size_t n_bytes, const u } static bool -serd_write_node(SerdWriter writer, - SerdNodeType type, - const SerdString* str, - const SerdString* datatype, - const SerdString* lang) +write_node(SerdWriter writer, + SerdNodeType type, + const SerdString* str, + const SerdString* datatype, + const SerdString* lang) { const SerdURI* base_uri = &writer->base_uri; SerdNamespaces ns = writer->ns; @@ -123,14 +166,20 @@ serd_write_node(SerdWriter writer, writer->sink(str->buf, str->n_bytes - 1, writer->stream); break; case QNAME: - if (!serd_namespaces_expand(ns, str, &uri_prefix, &uri_suffix)) { - fprintf(stderr, "error: undefined namespace prefix `%s'\n", str->buf); - return false; + switch (writer->syntax) { + case SERD_NTRIPLES: + if (!serd_namespaces_expand(ns, str, &uri_prefix, &uri_suffix)) { + fprintf(stderr, "error: undefined namespace prefix `%s'\n", str->buf); + return false; + } + writer->sink("<", 1, writer->stream); + write_text(writer, WRITE_URI, uri_prefix.buf, uri_prefix.len, '>'); + write_text(writer, WRITE_URI, uri_suffix.buf, uri_suffix.len, '>'); + writer->sink(">", 1, writer->stream); + break; + case SERD_TURTLE: + writer->sink(str->buf, str->n_bytes - 1, writer->stream); } - writer->sink("<", 1, writer->stream); - serd_write_ascii(writer, uri_prefix.buf, uri_prefix.len, '>'); - serd_write_ascii(writer, uri_suffix.buf, uri_suffix.len, '>'); - writer->sink(">", 1, writer->stream); break; case URI: if (!serd_uri_string_has_scheme(str->buf)) { @@ -146,27 +195,44 @@ serd_write_node(SerdWriter writer, } } else { writer->sink("<", 1, writer->stream); - serd_write_ascii(writer, str->buf, str->n_bytes - 1, '>'); + write_text(writer, WRITE_URI, str->buf, str->n_bytes - 1, '>'); writer->sink(">", 1, writer->stream); return true; } return false; case LITERAL: writer->sink("\"", 1, writer->stream); - serd_write_ascii(writer, str->buf, str->n_bytes - 1, '"'); + write_text(writer, WRITE_STRING, str->buf, str->n_bytes - 1, '"'); writer->sink("\"", 1, writer->stream); if (lang) { writer->sink("@", 1, writer->stream); writer->sink(lang->buf, lang->n_bytes - 1, writer->stream); } else if (datatype) { writer->sink("^^", 2, writer->stream); - serd_write_node(writer, URI, datatype, NULL, NULL); + write_node(writer, URI, datatype, NULL, NULL); } break; } return true; } +static void +serd_writer_write_delim(SerdWriter writer, const uint8_t delim) +{ + switch (delim) { + case 0: + case '\n': + break; + default: + writer->sink(" ", 1, writer->stream); + writer->sink(&delim, 1, writer->stream); + } + writer->sink("\n", 1, writer->stream); + for (unsigned i = 0; i < writer->indent; ++i) { + writer->sink("\t", 1, writer->stream); + } +} + SERD_API bool serd_writer_write_statement(SerdWriter writer, @@ -180,18 +246,98 @@ serd_writer_write_statement(SerdWriter writer, const SerdString* object_datatype, const SerdString* object_lang) { - serd_write_node(writer, subject_type, subject, NULL, NULL); + return writer->write_statement(writer, + graph, + subject, subject_type, + predicate, predicate_type, + object, object_type, object_datatype, object_lang); +} + +static bool +serd_writer_write_statement_abbrev(SerdWriter writer, + const SerdString* graph, + const SerdString* subject, + SerdNodeType subject_type, + const SerdString* predicate, + SerdNodeType predicate_type, + const SerdString* object, + SerdNodeType object_type, + const SerdString* object_datatype, + const SerdString* object_lang) +{ + assert(subject && predicate && object); + if (subject == writer->prev_s) { + if (predicate == writer->prev_p) { + ++writer->indent; + serd_writer_write_delim(writer, ','); + write_node(writer, object_type, object, object_datatype, object_lang); + --writer->indent; + } else { + serd_writer_write_delim(writer, ';'); + write_node(writer, predicate_type, predicate, NULL, NULL); + writer->sink(" ", 1, writer->stream); + write_node(writer, object_type, object, object_datatype, object_lang); + } + } else { + if (writer->prev_s) { + --writer->indent; + serd_writer_write_delim(writer, '.'); + serd_writer_write_delim(writer, '\n'); + } + write_node(writer, subject_type, subject, NULL, NULL); + ++writer->indent; + serd_writer_write_delim(writer, 0); + + writer->sink(" ", 1, writer->stream); + write_node(writer, predicate_type, predicate, NULL, NULL); + writer->sink(" ", 1, writer->stream); + + write_node(writer, object_type, object, object_datatype, object_lang); + } + + writer->prev_g = graph; + writer->prev_s = subject; + writer->prev_p = predicate; + writer->prev_o = object; + return true; +} + +SERD_API +bool +serd_writer_write_statement_flat(SerdWriter writer, + const SerdString* graph, + const SerdString* subject, + SerdNodeType subject_type, + const SerdString* predicate, + SerdNodeType predicate_type, + const SerdString* object, + SerdNodeType object_type, + const SerdString* object_datatype, + const SerdString* object_lang) +{ + assert(subject && predicate && object); + write_node(writer, subject_type, subject, NULL, NULL); writer->sink(" ", 1, writer->stream); - serd_write_node(writer, predicate_type, predicate, NULL, NULL); + write_node(writer, predicate_type, predicate, NULL, NULL); writer->sink(" ", 1, writer->stream); - serd_write_node(writer, object_type, object, object_datatype, object_lang); + write_node(writer, object_type, object, object_datatype, object_lang); writer->sink(" .\n", 3, writer->stream); return true; } SERD_API +void +serd_writer_finish(SerdWriter writer) +{ + if (writer->prev_s) { + writer->sink(" .\n", 3, writer->stream); + } +} + +SERD_API SerdWriter serd_writer_new(SerdSyntax syntax, + SerdStyle style, SerdNamespaces ns, const SerdURI* base_uri, SerdSink sink, @@ -199,10 +345,22 @@ serd_writer_new(SerdSyntax syntax, { SerdWriter writer = malloc(sizeof(struct SerdWriterImpl)); writer->syntax = syntax; + writer->style = style; writer->ns = ns; writer->base_uri = *base_uri; writer->sink = sink; writer->stream = stream; + writer->prev_g = 0; + writer->prev_s = 0; + writer->prev_p = 0; + writer->prev_o = 0; + writer->indent = 0; + writer->write_node = write_node; + if ((style & SERD_STYLE_ABBREVIATED)) { + writer->write_statement = serd_writer_write_statement_abbrev; + } else { + writer->write_statement = serd_writer_write_statement_flat; + } return writer; } @@ -216,6 +374,21 @@ serd_writer_set_base_uri(SerdWriter writer, SERD_API void +serd_writer_set_prefix(SerdWriter writer, + const SerdString* name, + const SerdString* uri) +{ + if (writer->syntax != SERD_NTRIPLES) { + writer->sink("@prefix ", 8, writer->stream); + writer->sink(name->buf, name->n_bytes - 1, writer->stream); + writer->sink(": <", 3, writer->stream); + write_text(writer, WRITE_URI, uri->buf, uri->n_bytes - 1, '>'); + writer->sink("> .\n", 4, writer->stream); + } +} + +SERD_API +void serd_writer_free(SerdWriter writer) { SerdWriter const me = (SerdWriter)writer; @@ -124,10 +124,20 @@ def test(ctx): autowaf.pre_test(ctx, APPNAME) + autowaf.run_tests(ctx, APPNAME, + ['./serdi_static > /dev/null', + './serdi_static ftp://example.org/unsupported.ttl > /dev/null'], + 1, name='serdi-fail') + + autowaf.run_tests(ctx, APPNAME, + ['./serdi_static file:../tests/manifest.ttl > /dev/null', + './serdi_static ../tests/UTF-8.ttl > /dev/null'], + 0, name='serdi-succeed') + commands = [] for test in good_tests: base_uri = 'http://www.w3.org/2001/sw/DataAccess/df1/' + test - commands = commands + [ './serdi_static ../%s \'%s\' > %s.out' % (test, base_uri, test) ] + commands += [ './serdi_static ../%s \'%s\' > %s.out' % (test, base_uri, test) ] autowaf.run_tests(ctx, APPNAME, commands, 0, name='good') @@ -145,24 +155,16 @@ def test(ctx): commands = [] for test in bad_tests: - commands = commands + [ './serdi_static ../%s \'http://www.w3.org/2001/sw/DataAccess/df1/%s\' > %s.out' % (test, test, test) ] + commands += [ './serdi_static ../%s \'http://www.w3.org/2001/sw/DataAccess/df1/%s\' > %s.out' % (test, test, test) ] autowaf.run_tests(ctx, APPNAME, commands, 1, name='bad') - autowaf.run_tests(ctx, APPNAME, - ['./serdi_static > /dev/null'], - 1, name='serdi-no-args') - - autowaf.run_tests(ctx, APPNAME, - ['./serdi_static file:../tests/manifest.ttl > /dev/null'], - 0, name='serdi-file-uri') - - autowaf.run_tests(ctx, APPNAME, - ['./serdi_static ftp://example.org/unsupported.ttl > /dev/null'], - 1, name='serdi-bad-uri') - - autowaf.run_tests(ctx, APPNAME, - ['./serdi_static ../tests/UTF-8.ttl > /dev/null'], - 0, name='utf8') +# commands = [] +# for test in good_tests: +# out_filename = test + '.thru' +# commands += [ './serdi_static -o turtle ../%s \'%s\' | ./serdi_static - \'%s\' > %s.out' % (test, base_uri, base_uri, test) ] +# +# autowaf.run_tests(ctx, APPNAME, commands, 0, name='turtle-write') +# autowaf.post_test(ctx, APPNAME) |