From 192508f36451afd1cd5f74ea73f29de9d43308d4 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Mon, 20 Feb 2012 00:36:21 +0000 Subject: Implement pretty-printing for collections. git-svn-id: http://svn.drobilla.net/serd/trunk@305 490d8e77-9747-427b-9fa3-0b8f29cee8a0 --- src/reader.c | 96 ++++++++++++++-------------- src/writer.c | 201 ++++++++++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 203 insertions(+), 94 deletions(-) (limited to 'src') diff --git a/src/reader.c b/src/reader.c index e365e45f..38d0c21a 100644 --- a/src/reader.c +++ b/src/reader.c @@ -277,7 +277,7 @@ emit_statement(SerdReader* reader, SerdStatementFlags* flags, deref(reader, o), deref(reader, d), deref(reader, l)); - *flags = (*flags & SERD_ANON_CONT) ? SERD_ANON_CONT : 0; + *flags &= SERD_ANON_CONT|SERD_LIST_CONT; // Preserve only cont flags return ret; } @@ -1008,7 +1008,7 @@ blank_id(SerdReader* reader) static bool read_blank(SerdReader* reader, ReadContext ctx, bool subject, Ref* dest) { - const bool was_anon_subject = subject && (*ctx.flags | SERD_ANON_CONT); + const SerdStatementFlags old_flags = *ctx.flags; switch (peek_byte(reader)) { case '_': *dest = read_nodeID(reader); @@ -1016,25 +1016,29 @@ read_blank(SerdReader* reader, ReadContext ctx, bool subject, Ref* dest) case '[': eat_byte_safe(reader, '['); read_ws_star(reader); - *dest = blank_id(reader); - if (peek_byte(reader) == ']') { - eat_byte_safe(reader, ']'); + + const bool empty = (peek_byte(reader) == ']'); + if (empty) { *ctx.flags |= (subject) ? SERD_EMPTY_S : SERD_EMPTY_O; - if (ctx.subject) { - TRY_RET(emit_statement(reader, ctx.flags, - ctx.graph, ctx.subject, ctx.predicate, - *dest, 0, 0)); - } - return true; + } else { + *ctx.flags |= (subject) ? SERD_ANON_S_BEGIN : SERD_ANON_O_BEGIN; } - *ctx.flags |= (subject) ? SERD_ANON_S_BEGIN : SERD_ANON_O_BEGIN; + *dest = blank_id(reader); if (ctx.subject) { TRY_RET(emit_statement(reader, ctx.flags, ctx.graph, ctx.subject, ctx.predicate, *dest, 0, 0)); } + ctx.subject = *dest; + *ctx.flags &= ~(SERD_LIST_CONT); + + if (empty) { + eat_byte_safe(reader, ']'); + return true; + } + if (!subject) { *ctx.flags |= SERD_ANON_CONT; } @@ -1044,20 +1048,10 @@ read_blank(SerdReader* reader, ReadContext ctx, bool subject, Ref* dest) if (reader->end_sink) { reader->end_sink(reader->handle, deref(reader, *dest)); } - if (!was_anon_subject) { - *ctx.flags &= ~SERD_ANON_CONT; - } + *ctx.flags = old_flags; return true; case '(': - if (read_collection(reader, ctx, dest)) { - if (ctx.subject) { - TRY_RET(emit_statement(reader, ctx.flags, - ctx.graph, ctx.subject, ctx.predicate, - *dest, 0, 0)); - } - return true; - } - return false; + return read_collection(reader, ctx, dest); default: return error(reader, "illegal blank node\n"); } @@ -1199,32 +1193,23 @@ static bool read_collection_rec(SerdReader* reader, ReadContext ctx) { read_ws_star(reader); - if (peek_byte(reader) == ')') { + const bool end = (peek_byte(reader) == ')'); + const Ref rest = (end ? reader->rdf_nil : blank_id(reader)); + *ctx.flags |= SERD_LIST_CONT; + TRY_RET(emit_statement(reader, ctx.flags, ctx.graph, + ctx.subject, reader->rdf_rest, rest, 0, 0)); + + if (end) { eat_byte_safe(reader, ')'); - TRY_RET(emit_statement(reader, ctx.flags, - 0, - ctx.subject, - reader->rdf_rest, - reader->rdf_nil, 0, 0)); - return false; } else { - const Ref rest = blank_id(reader); - TRY_RET(emit_statement(reader, ctx.flags, - ctx.graph, - ctx.subject, - reader->rdf_rest, - rest, 0, 0)); - ctx.subject = rest; - ctx.predicate = reader->rdf_first; + ctx.subject = rest; + ctx.predicate = reader->rdf_first; if (read_object(reader, ctx)) { - read_collection_rec(reader, ctx); - pop_node(reader, rest); - return true; - } else { - pop_node(reader, rest); - return false; + TRY_RET(read_collection_rec(reader, ctx)); } } + *ctx.flags &= ~SERD_LIST_CONT; + return true; } // [22] itemList ::= object+ @@ -1232,22 +1217,33 @@ read_collection_rec(SerdReader* reader, ReadContext ctx) static bool read_collection(SerdReader* reader, ReadContext ctx, Ref* dest) { - TRY_RET(eat_byte_safe(reader, '(')); + eat_byte_safe(reader, '('); read_ws_star(reader); - if (peek_byte(reader) == ')') { // Empty collection - eat_byte_safe(reader, ')'); + + if (peek_byte(reader) == ')') { *dest = reader->rdf_nil; + if (ctx.subject) { + TRY_RET(emit_statement(reader, ctx.flags, ctx.graph, + ctx.subject, ctx.predicate, *dest, 0, 0)); + } + eat_byte_safe(reader, ')'); return true; } - *dest = blank_id(reader); + *dest = blank_id(reader); + *ctx.flags |= (!ctx.subject) ? SERD_LIST_S_BEGIN : SERD_LIST_O_BEGIN; + + if (ctx.subject) { + TRY_RET(emit_statement(reader, ctx.flags, ctx.graph, + ctx.subject, ctx.predicate, *dest, 0, 0)); + *ctx.flags = SERD_LIST_CONT; + } ctx.subject = *dest; ctx.predicate = reader->rdf_first; if (!read_object(reader, ctx)) { return error(reader, "unexpected end of collection\n"); } - ctx.subject = *dest; return read_collection_rec(reader, ctx); } diff --git a/src/writer.c b/src/writer.c index 469e0090..84bd465b 100644 --- a/src/writer.c +++ b/src/writer.c @@ -36,6 +36,43 @@ static const WriteContext WRITE_CONTEXT_NULL = { { 0, 0, 0, 0, SERD_NOTHING } }; +typedef enum { + SEP_NONE, + SEP_END_S, ///< End of a subject ('.') + SEP_END_P, ///< End of a predicate (';') + SEP_END_O, ///< End of an object (',') + SEP_S_P, ///< Between a subject and predicate (whitespace) + SEP_P_O, ///< Between a predicate and object (whitespace) + SEP_ANON_BEGIN, ///< Start of anonymous node ('[') + SEP_ANON_END, ///< End of anonymous node (']') + SEP_LIST_BEGIN, ///< Start of list ('(') + SEP_LIST_SEP, ///< List separator (whitespace) + SEP_LIST_END ///< End of list (')') +} Sep; + +typedef struct { + const char* str; ///< Sep string + uint8_t len; ///< Length of sep string + uint8_t space_before; ///< Newline before sep + uint8_t space_after_node; ///< Newline after sep if after node + uint8_t space_after_sep; ///< Newline after sep if after sep +} SepRule; + +static const SepRule rules[] = { + { NULL, 0, 0, 0, 0 }, + { " .\n\n", 4, 0, 0, 0 }, + { " ;", 2, 0, 1, 1 }, + { " ,", 2, 0, 1, 0 }, + { NULL, 0, 0, 1, 0 }, + { " ", 1, 0, 0, 0 }, + { "[", 1, 0, 1, 1 }, + { "]", 1, 0, 0, 0 }, + { "(", 1, 0, 0, 0 }, + { NULL, 1, 0, 1, 0 }, + { ")", 1, 1, 0, 0 }, + { "\n", 1, 0, 1, 0 } +}; + struct SerdWriterImpl { SerdSyntax syntax; SerdStyle style; @@ -46,9 +83,12 @@ struct SerdWriterImpl { SerdSink sink; void* stream; WriteContext context; + SerdNode list_subj; + unsigned list_depth; uint8_t* bprefix; size_t bprefix_len; unsigned indent; + Sep last_sep; bool empty; }; @@ -202,22 +242,33 @@ write_text(SerdWriter* writer, TextContext ctx, } static void -serd_writer_write_delim(SerdWriter* writer, const uint8_t delim) +write_newline(SerdWriter* writer) { - switch (delim) { - case '\n': - break; - default: - sink(" ", 1, writer); - case '[': - sink(&delim, 1, writer); - } sink("\n", 1, writer); for (unsigned i = 0; i < writer->indent; ++i) { sink("\t", 1, writer); } } +static void +write_sep(SerdWriter* writer, const Sep sep) +{ + const SepRule* rule = &rules[sep]; + if (rule->space_before) { + write_newline(writer); + } + if (rule->str) { + sink(rule->str, rule->len, writer); + } + if ( (writer->last_sep && rule->space_after_sep) + || (!writer->last_sep && rule->space_after_node)) { + write_newline(writer); + } else if (writer->last_sep && rule->space_after_node) { + sink(" ", 1, writer); + } + writer->last_sep = sep; +} + static void reset_context(SerdWriter* writer, bool del) { @@ -257,10 +308,22 @@ write_node(SerdWriter* writer, && ((field == FIELD_SUBJECT && (flags & SERD_ANON_S_BEGIN)) || (field == FIELD_OBJECT && (flags & SERD_ANON_O_BEGIN)))) { ++writer->indent; - serd_writer_write_delim(writer, '['); + write_sep(writer, SEP_ANON_BEGIN); + } else if (writer->syntax != SERD_NTRIPLES + && (field == FIELD_SUBJECT && (flags & SERD_LIST_S_BEGIN))) { + assert(writer->list_depth == 0); + copy_node(&writer->list_subj, node); + ++writer->list_depth; + ++writer->indent; + write_sep(writer, SEP_LIST_BEGIN); + } else if (writer->syntax != SERD_NTRIPLES + && (field == FIELD_OBJECT && (flags & SERD_LIST_O_BEGIN))) { + ++writer->indent; + ++writer->list_depth; + write_sep(writer, SEP_LIST_BEGIN); } else if (writer->syntax != SERD_NTRIPLES - && ((field == FIELD_SUBJECT && (flags & SERD_EMPTY_S)) - || (field == FIELD_OBJECT && (flags & SERD_EMPTY_O)))) { + && ((field == FIELD_SUBJECT && (flags & SERD_EMPTY_S)) + || (field == FIELD_OBJECT && (flags & SERD_EMPTY_O)))) { sink("[]", 2, writer); } else { sink("_:", 2, writer); @@ -325,7 +388,11 @@ write_node(SerdWriter* writer, if ((writer->syntax == SERD_TURTLE) && !strcmp((const char*)node->buf, NS_RDF "type")) { sink("a", 1, writer); - return true; + break; + } else if ((writer->syntax == SERD_TURTLE) + && !strcmp((const char*)node->buf, NS_RDF "nil")) { + sink("()", 2, writer); + break; } else if ((writer->style & SERD_STYLE_CURIED) && serd_uri_string_has_scheme(node->buf)) { SerdNode prefix; @@ -334,7 +401,7 @@ write_node(SerdWriter* writer, write_text(writer, WRITE_URI, prefix.buf, prefix.n_bytes, '>'); sink(":", 1, writer); write_text(writer, WRITE_URI, suffix.buf, suffix.len, '>'); - return true; + break; } } else if ((writer->style & SERD_STYLE_RESOLVED) && !serd_uri_string_has_scheme(node->buf)) { @@ -345,7 +412,7 @@ write_node(SerdWriter* writer, sink("<", 1, writer); serd_uri_serialise(&abs_uri, (SerdSink)sink, writer); sink(">", 1, writer); - return true; + break; } sink("<", 1, writer); write_text(writer, WRITE_URI, node->buf, node->n_bytes, '>'); @@ -353,6 +420,7 @@ write_node(SerdWriter* writer, default: break; } + writer->last_sep = SEP_NONE; return true; } @@ -367,6 +435,45 @@ is_resource(const SerdNode* node) } } +static void +serd_writer_write_predicate(SerdWriter* writer, + SerdStatementFlags flags, + const SerdNode* predicate) +{ + if (!(flags & (SERD_LIST_CONT|SERD_LIST_S_BEGIN))) { + write_node(writer, predicate, NULL, NULL, FIELD_PREDICATE, flags); + write_sep(writer, SEP_P_O); + } + copy_node(&writer->context.predicate, predicate); +} + +static bool +serd_writer_write_object(SerdWriter* writer, + SerdStatementFlags flags, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* object_datatype, + const SerdNode* object_lang) +{ + if (!strcmp((const char*)object->buf, NS_RDF "nil")) { + if (flags & SERD_LIST_CONT) { + --writer->indent; + write_sep(writer, SEP_LIST_END); + return true; + } else { + sink("()", 2, writer); + } + } else if (strcmp((const char*)predicate->buf, NS_RDF "rest")) { + if (!strcmp((const char*)predicate->buf, NS_RDF "first")) { + write_sep(writer, SEP_LIST_SEP); + } + + write_node(writer, object, object_datatype, object_lang, + FIELD_OBJECT, flags); + } + return false; +} + SERD_API SerdStatus serd_writer_write_statement(SerdWriter* writer, @@ -399,15 +506,15 @@ serd_writer_write_statement(SerdWriter* writer, default: break; } + + bool was_list_end = false; if (serd_node_equals(subject, &writer->context.subject)) { if (serd_node_equals(predicate, &writer->context.predicate)) { // Abbreviate S P - if ((flags & SERD_ANON_O_BEGIN)) { - sink(" , ", 3, writer); // ] , [ - } else { + if (!(flags & SERD_ANON_O_BEGIN)) { ++writer->indent; - serd_writer_write_delim(writer, ','); } + write_sep(writer, SEP_END_O); write_node(writer, object, object_datatype, object_lang, FIELD_OBJECT, flags); if (!(flags & SERD_ANON_O_BEGIN)) { @@ -415,35 +522,34 @@ serd_writer_write_statement(SerdWriter* writer, } } else { // Abbreviate S - if (writer->context.predicate.type) { - serd_writer_write_delim(writer, ';'); - } else { - serd_writer_write_delim(writer, '\n'); + if (!(flags & SERD_LIST_CONT)) { + write_sep(writer, + (writer->context.predicate.type + ? SEP_END_P : SEP_S_P)); } - write_node(writer, predicate, NULL, NULL, FIELD_PREDICATE, flags); - copy_node(&writer->context.predicate, predicate); - sink(" ", 1, writer); - write_node(writer, object, object_datatype, object_lang, - FIELD_OBJECT, flags); + + serd_writer_write_predicate(writer, flags, predicate); + + was_list_end = serd_writer_write_object( + writer, flags, predicate, object, object_datatype, object_lang); } } else { + // No abbreviation if (writer->context.subject.type) { assert(writer->indent > 0); --writer->indent; - if (serd_stack_is_empty(&writer->anon_stack)) { - serd_writer_write_delim(writer, '.'); - serd_writer_write_delim(writer, '\n'); + if (serd_stack_is_empty(&writer->anon_stack) + && !(flags & SERD_LIST_CONT)) { + write_sep(writer, SEP_END_S); } } else if (!writer->empty) { - serd_writer_write_delim(writer, '\n'); + write_sep(writer, SEP_S_P); } - if (!(flags & SERD_ANON_CONT)) { + if (!(flags & (SERD_ANON_CONT|SERD_LIST_CONT))) { write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags); ++writer->indent; - if (!(flags & SERD_ANON_S_BEGIN)) { - serd_writer_write_delim(writer, '\n'); - } + write_sep(writer, SEP_S_P); } else { ++writer->indent; } @@ -451,15 +557,19 @@ serd_writer_write_statement(SerdWriter* writer, reset_context(writer, false); copy_node(&writer->context.subject, subject); - write_node(writer, predicate, NULL, NULL, FIELD_PREDICATE, flags); - copy_node(&writer->context.predicate, predicate); - sink(" ", 1, writer); + serd_writer_write_predicate(writer, flags, predicate); - write_node(writer, object, object_datatype, object_lang, - FIELD_OBJECT, flags); + was_list_end = serd_writer_write_object( + writer, flags, predicate, object, object_datatype, object_lang); } - if ((flags & SERD_ANON_S_BEGIN) || (flags & SERD_ANON_O_BEGIN)) { + if (was_list_end) { + if (--writer->list_depth == 0 && writer->list_subj.type) { + reset_context(writer, false); + writer->context.subject = writer->list_subj; + writer->list_subj = SERD_NODE_NULL; + } + } else if (flags & (SERD_ANON_S_BEGIN|SERD_ANON_O_BEGIN)) { WriteContext* ctx = (WriteContext*)serd_stack_push( &writer->anon_stack, sizeof(WriteContext)); *ctx = writer->context; @@ -490,8 +600,8 @@ serd_writer_end_anon(SerdWriter* writer, } assert(writer->indent > 0); --writer->indent; - serd_writer_write_delim(writer, '\n'); - sink("]", 1, writer); + write_sep(writer, SEP_END_P); + write_sep(writer, SEP_ANON_END); reset_context(writer, true); writer->context = *anon_stack_top(writer); serd_stack_pop(&writer->anon_stack, sizeof(WriteContext)); @@ -536,9 +646,12 @@ serd_writer_new(SerdSyntax syntax, writer->sink = sink; writer->stream = stream; writer->context = context; + writer->list_subj = SERD_NODE_NULL; + writer->list_depth = 0; writer->bprefix = NULL; writer->bprefix_len = 0; writer->indent = 0; + writer->last_sep = SEP_NONE; writer->empty = true; if (style & SERD_STYLE_BULK) { writer->bulk_sink = serd_bulk_sink_new(sink, stream, SERD_PAGE_SIZE); -- cgit v1.2.1