aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2018-10-21 20:57:33 +0200
committerDavid Robillard <d@drobilla.net>2018-11-25 22:12:47 +0100
commit505209a2ff35c2b880cc64712932fd4d11aeaf8d (patch)
tree32a4a3536b699e1f02dba1379fcd69f78ab4eb20 /src
parentea4ade33a6d6ed11d870600b9e54a3f8e34ddb29 (diff)
downloadserd-505209a2ff35c2b880cc64712932fd4d11aeaf8d.tar.gz
serd-505209a2ff35c2b880cc64712932fd4d11aeaf8d.tar.bz2
serd-505209a2ff35c2b880cc64712932fd4d11aeaf8d.zip
Simplify streaming API and improve pretty printing
This removes the obligation from the caller to correctly maintain flags to describe the current anonymous context, instead making the writer handle this itself as much as possible. Flags remain for the cases the writer can not infer from context: the start of anonymous subject and object nodes.
Diffstat (limited to 'src')
-rw-r--r--src/n3.c17
-rw-r--r--src/reader.c2
-rw-r--r--src/writer.c258
3 files changed, 157 insertions, 120 deletions
diff --git a/src/n3.c b/src/n3.c
index bce9e01d..8a45c079 100644
--- a/src/n3.c
+++ b/src/n3.c
@@ -937,7 +937,7 @@ read_anon(SerdReader* reader, ReadContext ctx, bool subject, SerdNode** dest)
bool empty;
eat_byte_safe(reader, '[');
if ((empty = peek_delim(reader, ']'))) {
- *ctx.flags |= (subject) ? SERD_EMPTY_S : SERD_EMPTY_O;
+ *ctx.flags |= (subject) ? SERD_EMPTY_S : SERD_ANON_O_BEGIN;
} else {
*ctx.flags |= (subject) ? SERD_ANON_S_BEGIN : SERD_ANON_O_BEGIN;
if (peek_delim(reader, '=')) {
@@ -959,21 +959,19 @@ read_anon(SerdReader* reader, ReadContext ctx, bool subject, SerdNode** dest)
ctx.subject = *dest;
if (!empty) {
- *ctx.flags &= ~(SERD_LIST_CONT);
- if (!subject) {
- *ctx.flags |= SERD_ANON_CONT;
- }
bool ate_dot_in_list = false;
read_predicateObjectList(reader, ctx, &ate_dot_in_list);
if (ate_dot_in_list) {
return r_err(reader, SERD_ERR_BAD_SYNTAX, "`.' inside blank\n");
}
read_ws_star(reader);
- if (reader->sink->end) {
- reader->sink->end(reader->sink->handle, *dest);
- }
*ctx.flags = old_flags;
}
+
+ if (reader->sink->end && (!subject || !empty)) {
+ reader->sink->end(reader->sink->handle, *dest);
+ }
+
return (eat_byte_check(reader, ']') == ']') ? SERD_SUCCESS
: SERD_ERR_BAD_SYNTAX;
}
@@ -1130,7 +1128,6 @@ end_collection(SerdReader* reader,
SerdNode* n2,
SerdStatus st)
{
- *ctx.flags &= ~SERD_LIST_CONT;
if (!st) {
eat_byte_safe(reader, ')');
}
@@ -1148,7 +1145,6 @@ read_collection(SerdReader* reader, ReadContext ctx, SerdNode** dest)
// subject predicate _:head
*ctx.flags |= (end ? 0 : SERD_LIST_O_BEGIN);
TRY(st, emit_statement(reader, ctx, *dest));
- *ctx.flags |= SERD_LIST_CONT;
} else {
*ctx.flags |= (end ? 0 : SERD_LIST_S_BEGIN);
}
@@ -1188,7 +1184,6 @@ read_collection(SerdReader* reader, ReadContext ctx, SerdNode** dest)
}
// _:node rdf:rest _:rest
- *ctx.flags |= SERD_LIST_CONT;
ctx.predicate = reader->rdf_rest;
TRY(st, emit_statement(reader, ctx, (end ? reader->rdf_nil : rest)));
diff --git a/src/reader.c b/src/reader.c
index 94387171..e286fd4e 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -128,7 +128,7 @@ emit_statement(SerdReader* reader, ReadContext ctx, SerdNode* o)
const SerdStatus st = reader->sink->statement(
reader->sink->handle, *ctx.flags, &statement);
- *ctx.flags &= SERD_ANON_CONT|SERD_LIST_CONT; // Preserve only cont flags
+ *ctx.flags = 0;
return st;
}
diff --git a/src/writer.c b/src/writer.c
index 6a98147c..7b00eaa9 100644
--- a/src/writer.c
+++ b/src/writer.c
@@ -30,57 +30,72 @@
#include <stdlib.h>
#include <string.h>
-typedef struct {
- SerdNode* graph;
- SerdNode* subject;
- SerdNode* predicate;
+typedef enum {
+ CTX_NAMED, ///< Normal non-anonymous context
+ CTX_BLANK, ///< Anonymous blank node
+ CTX_LIST ///< Anonymous list
+} ContextType;
+
+typedef struct
+{
+ ContextType type;
+ SerdNode* graph;
+ SerdNode* subject;
+ SerdNode* predicate;
} WriteContext;
-static const WriteContext WRITE_CONTEXT_NULL = { NULL, NULL, NULL };
+static const WriteContext WRITE_CONTEXT_NULL = { CTX_NAMED, NULL, NULL, NULL };
typedef enum {
SEP_NONE,
+ SEP_NODE, ///< Placeholder for nodes
SEP_END_S, ///< End of a subject ('.')
SEP_END_P, ///< End of a predicate (';')
SEP_END_O, ///< End of an object (',')
SEP_S_P, ///< Between a subject and predicate (whitespace)
SEP_P_O, ///< Between a predicate and object (whitespace)
SEP_ANON_BEGIN, ///< Start of anonymous node ('[')
+ SEP_ANON_S_P, ///< Between start of anonymous node and predicate
SEP_ANON_END, ///< End of anonymous node (']')
SEP_LIST_BEGIN, ///< Start of list ('(')
SEP_LIST_SEP, ///< List separator (whitespace)
SEP_LIST_END, ///< End of list (')')
SEP_GRAPH_BEGIN, ///< Start of graph ('{')
SEP_GRAPH_END, ///< End of graph ('}')
- SEP_URI_BEGIN, ///< URI start quote ('<')
- SEP_URI_END ///< URI end quote ('>')
} Sep;
+typedef uint32_t SepMask; ///< Bitfield of separator flags
+
+#define SEP_ALL ((SepMask)-1)
+#define M(s) (1U << (s))
+
typedef struct {
- const char* str; ///< Sep string
- uint8_t len; ///< Length of sep string
- uint8_t space_before; ///< Newline before sep
- uint8_t space_after_node; ///< Newline after sep if after node
- uint8_t space_after_sep; ///< Newline after sep if after sep
+ const char* str; ///< Sep string
+ size_t len; ///< Length of sep string
+ int indent; ///< Indent delta
+ SepMask pre_space_after; ///< Leading space if after given seps
+ SepMask pre_line_after; ///< Leading newline if after given seps
+ SepMask post_line_after; ///< Trailing newline if after given seps
} SepRule;
static const SepRule rules[] = {
- { NULL, 0, 0, 0, 0 },
- { " .\n\n", 4, 0, 0, 0 },
- { " ;", 2, 0, 1, 1 },
- { " ,", 2, 0, 1, 0 },
- { NULL, 0, 0, 1, 0 },
- { " ", 1, 0, 0, 0 },
- { "[", 1, 0, 1, 1 },
- { "]", 1, 1, 0, 0 },
- { "(", 1, 0, 0, 0 },
- { NULL, 1, 0, 1, 0 },
- { ")", 1, 1, 0, 0 },
- { " {", 2, 0, 1, 1 },
- { " }", 2, 0, 1, 1 },
- { "<", 1, 0, 0, 0 },
- { ">", 1, 0, 0, 0 },
- { "\n", 1, 0, 1, 0 }
+ {"", 0, +0, SEP_NONE, SEP_NONE, SEP_NONE},
+ {"", 0, +0, SEP_NONE, SEP_NONE, SEP_NONE},
+ {".\n", 2, -1, SEP_ALL, SEP_NONE, SEP_NONE},
+ {";", 1, +0, SEP_ALL, SEP_NONE, SEP_ALL},
+ {",", 1, +0, SEP_ALL, SEP_NONE, ~(M(SEP_ANON_END) | M(SEP_LIST_END))},
+ {"", 0, +1, SEP_NONE, SEP_NONE, SEP_ALL},
+ {" ", 1, +0, SEP_NONE, SEP_NONE, SEP_NONE},
+ {"[", 1, +1, M(SEP_END_O), SEP_NONE, SEP_NONE},
+ {"", 0, +0, SEP_NONE, SEP_ALL, SEP_NONE},
+ {"]", 1, -1, SEP_NONE, ~M(SEP_ANON_BEGIN), SEP_NONE},
+ {"(", 1, +1, M(SEP_END_O), SEP_NONE, SEP_ALL},
+ {"", 0, +0, SEP_NONE, SEP_ALL, SEP_NONE},
+ {")", 1, -1, SEP_NONE, SEP_ALL, SEP_NONE},
+ {"{", 1, +1, SEP_ALL, SEP_NONE, SEP_NONE},
+ {"}", 1, -1, SEP_NONE, SEP_NONE, SEP_ALL},
+ {"<", 1, +0, SEP_NONE, SEP_NONE, SEP_NONE},
+ {">", 1, +0, SEP_NONE, SEP_NONE, SEP_NONE},
};
struct SerdWriterImpl {
@@ -97,8 +112,6 @@ struct SerdWriterImpl {
SerdErrorSink error_sink;
void* error_handle;
WriteContext context;
- SerdNode* list_subj;
- unsigned list_depth;
unsigned indent;
char* bprefix;
size_t bprefix_len;
@@ -375,18 +388,28 @@ static bool
write_sep(SerdWriter* writer, const Sep sep)
{
const SepRule* rule = &rules[sep];
- if (rule->space_before) {
+
+ // Adjust indent, but tolerate if it would become negative
+ writer->indent =
+ ((rule->indent >= 0 || writer->indent >= (unsigned)-rule->indent)
+ ? writer->indent + rule->indent
+ : 0);
+
+ // Write newline or space before separator if necessary
+ if (rule->pre_line_after & (1 << writer->last_sep)) {
write_newline(writer);
+ } else if (rule->pre_space_after & (1 << writer->last_sep)) {
+ sink(" ", 1, writer);
}
- if (rule->str) {
- sink(rule->str, rule->len, writer);
- }
- if ((writer->last_sep && rule->space_after_sep) ||
- (!writer->last_sep && rule->space_after_node)) {
+
+ // Write actual separator string
+ sink(rule->str, rule->len, writer);
+
+ // Write newline after separator if necessary
+ if (rule->post_line_after & (1 << writer->last_sep)) {
write_newline(writer);
- } else if (writer->last_sep && rule->space_after_node) {
- sink(" ", 1, writer);
}
+
writer->last_sep = sep;
return true;
}
@@ -432,6 +455,8 @@ write_literal(SerdWriter* writer,
SerdField field,
SerdStatementFlags flags)
{
+ writer->last_sep = SEP_NONE;
+
const SerdNode* datatype = serd_node_get_datatype(node);
const SerdNode* lang = serd_node_get_language(node);
const char* node_str = serd_node_get_string(node);
@@ -492,19 +517,18 @@ write_uri_node(SerdWriter* const writer,
const SerdField field,
const SerdStatementFlags flags)
{
- const SerdNode* prefix;
- SerdSlice suffix;
-
+ writer->last_sep = SEP_NONE;
if (is_inline_start(writer, field, flags)) {
- ++writer->indent;
write_sep(writer, SEP_ANON_BEGIN);
- sink("== ", 3, writer);
+ sink(" == ", 4, writer);
}
- const char* node_str = serd_node_get_string(node);
- const bool has_scheme = serd_uri_string_has_scheme(node_str);
- if (field == SERD_PREDICATE && supports_abbrev(writer)
- && serd_node_equals(node, writer->world->rdf_type)) {
+ const SerdNode* prefix;
+ SerdSlice suffix;
+ const char* node_str = serd_node_get_string(node);
+ const bool has_scheme = serd_uri_string_has_scheme(node_str);
+ if (field == SERD_PREDICATE && supports_abbrev(writer) &&
+ serd_node_equals(node, writer->world->rdf_type)) {
return sink("a", 1, writer) == 1;
} else if (supports_abbrev(writer) &&
serd_node_equals(node, writer->world->rdf_nil)) {
@@ -518,7 +542,7 @@ write_uri_node(SerdWriter* const writer,
return true;
}
- write_sep(writer, SEP_URI_BEGIN);
+ sink("<", 1, writer);
if (serd_env_get_base_uri(writer->env)) {
const SerdURI* base_uri = serd_env_get_parsed_base_uri(writer->env);
SerdURI uri, abs_uri;
@@ -536,10 +560,10 @@ write_uri_node(SerdWriter* const writer,
} else {
write_uri_from_node(writer, node);
}
- write_sep(writer, SEP_URI_END);
+ sink(">", 1, writer);
+ writer->last_sep = SEP_NONE;
if (is_inline_start(writer, field, flags)) {
sink(" ;", 2, writer);
- write_newline(writer);
}
return true;
}
@@ -550,7 +574,7 @@ write_curie(SerdWriter* const writer,
const SerdField field,
const SerdStatementFlags flags)
{
- const char* node_str = serd_node_get_string(node);
+ writer->last_sep = SEP_NONE;
SerdSlice prefix;
SerdSlice suffix;
@@ -563,25 +587,23 @@ write_curie(SerdWriter* const writer,
serd_world_errorf(writer->world,
st,
"undefined namespace prefix `%s'\n",
- node_str);
+ serd_node_get_string(node));
return false;
}
- write_sep(writer, SEP_URI_BEGIN);
+ sink("<", 1, writer);
write_uri(writer, prefix.buf, prefix.len);
write_uri(writer, suffix.buf, suffix.len);
- write_sep(writer, SEP_URI_END);
+ sink(">", 1, writer);
break;
case SERD_TURTLE:
case SERD_TRIG:
if (is_inline_start(writer, field, flags)) {
- ++writer->indent;
write_sep(writer, SEP_ANON_BEGIN);
- sink("== ", 3, writer);
+ sink(" == ", 4, writer);
}
- write_lname(writer, node_str, node->n_bytes);
+ write_lname(writer, serd_node_get_string(node), node->n_bytes);
if (is_inline_start(writer, field, flags)) {
sink(" ;", 2, writer);
- write_newline(writer);
}
}
return true;
@@ -596,20 +618,14 @@ write_blank(SerdWriter* const writer,
const char* node_str = serd_node_get_string(node);
if (supports_abbrev(writer)) {
if (is_inline_start(writer, field, flags)) {
- ++writer->indent;
return write_sep(writer, SEP_ANON_BEGIN);
- } else if (field == SERD_SUBJECT && (flags & SERD_LIST_S_BEGIN)) {
- assert(writer->list_depth == 0);
- serd_node_set(&writer->list_subj, node);
- ++writer->list_depth;
- ++writer->indent;
+ } else if ((field == SERD_SUBJECT && (flags & SERD_LIST_S_BEGIN)) ||
+ (field == SERD_OBJECT && (flags & SERD_LIST_O_BEGIN))) {
return write_sep(writer, SEP_LIST_BEGIN);
- } else if (field == SERD_OBJECT && (flags & SERD_LIST_O_BEGIN)) {
- ++writer->indent;
- ++writer->list_depth;
- return write_sep(writer, SEP_LIST_BEGIN);
- } else if ((field == SERD_SUBJECT && (flags & SERD_EMPTY_S)) ||
- (field == SERD_OBJECT && (flags & SERD_EMPTY_O))) {
+ } else if (field == SERD_SUBJECT && (flags & SERD_EMPTY_S)) {
+ /* Last character is technically a separator, but reset because we
+ want to treat "[]" like a node. */
+ writer->last_sep = SEP_NONE;
return sink("[]", 2, writer) == 2;
}
}
@@ -624,6 +640,7 @@ write_blank(SerdWriter* const writer,
sink(node_str, node->n_bytes, writer);
}
+ writer->last_sep = SEP_NONE;
return true;
}
@@ -648,7 +665,6 @@ write_node(SerdWriter* writer,
ret = write_blank(writer, node, field, flags);
default: break;
}
- writer->last_sep = SEP_NONE;
return ret;
}
@@ -669,17 +685,27 @@ write_pred(SerdWriter* writer, SerdStatementFlags flags, const SerdNode* pred)
static bool
write_list_obj(SerdWriter* writer,
SerdStatementFlags flags,
+ const SerdNode* subject,
const SerdNode* predicate,
const SerdNode* object)
{
if (serd_node_equals(object, writer->world->rdf_nil)) {
- --writer->indent;
write_sep(writer, SEP_LIST_END);
return true;
- } else if (serd_node_equals(predicate, writer->world->rdf_first)) {
+ }
+
+ if (serd_node_equals(predicate, writer->world->rdf_rest) &&
+ !serd_node_equals(subject, ctx(writer, SERD_SUBJECT))) {
+ // Element following a nested list
write_sep(writer, SEP_LIST_SEP);
+ }
+
+ if (serd_node_equals(predicate, writer->world->rdf_first)) {
write_node(writer, object, SERD_OBJECT, flags);
+ } else if (serd_node_equals(subject, ctx(writer, SERD_SUBJECT))) {
+ write_sep(writer, SEP_LIST_SEP);
}
+
return false;
}
@@ -723,45 +749,39 @@ serd_writer_write_statement(SerdWriter* writer,
if ((graph && !serd_node_equals(graph, writer->context.graph)) ||
(!graph && ctx(writer, SERD_GRAPH))) {
- writer->indent = 0;
if (ctx(writer, SERD_SUBJECT)) {
write_sep(writer, SEP_END_S);
}
if (ctx(writer, SERD_GRAPH)) {
write_sep(writer, SEP_GRAPH_END);
}
+ if (!writer->empty) {
+ write_newline(writer); // Blank line between top level items
+ }
reset_context(writer, true);
if (graph) {
TRY(write_node(writer, graph, SERD_GRAPH, flags));
- ++writer->indent;
write_sep(writer, SEP_GRAPH_BEGIN);
serd_node_set(&writer->context.graph, graph);
}
}
- if ((flags & SERD_LIST_CONT)) {
- if (write_list_obj(writer, flags, predicate, object)) {
+ if (writer->context.type == CTX_LIST) {
+ if (write_list_obj(writer, flags, subject, predicate, object)) {
// Reached end of list
- if (--writer->list_depth == 0 && writer->list_subj) {
- reset_context(writer, false);
- serd_node_free(writer->context.subject);
- writer->context.subject = writer->list_subj;
- writer->list_subj = NULL;
- }
+ free_context(writer);
+ writer->context = *anon_stack_top(writer);
+ serd_stack_pop(&writer->anon_stack, sizeof(WriteContext));
return SERD_SUCCESS;
}
} else if (serd_node_equals(subject, writer->context.subject)) {
if (serd_node_equals(predicate, writer->context.predicate)) {
// Abbreviate S P
- if (!(flags & SERD_ANON_O_BEGIN)) {
- ++writer->indent;
- }
+ ++writer->indent;
write_sep(writer, SEP_END_O);
+ --writer->indent;
write_node(writer, object, SERD_OBJECT, flags);
- if (!(flags & SERD_ANON_O_BEGIN)) {
- --writer->indent;
- }
} else {
// Abbreviate S
Sep sep = ctx(writer, SERD_PREDICATE) ? SEP_END_P : SEP_S_P;
@@ -771,22 +791,24 @@ serd_writer_write_statement(SerdWriter* writer,
}
} else {
// No abbreviation
- if (ctx(writer, SERD_SUBJECT)) {
- assert(writer->indent > 0);
- --writer->indent;
- if (serd_stack_is_empty(&writer->anon_stack)) {
- write_sep(writer, SEP_END_S);
+ if (serd_stack_is_empty(&writer->anon_stack)) {
+ if (ctx(writer, SERD_SUBJECT)) {
+ write_sep(writer, SEP_END_S); // Terminate last subject
+ }
+ if (!writer->empty) {
+ write_newline(writer); // Blank line between top level items
}
- } else if (!writer->empty) {
- write_sep(writer, SEP_S_P);
}
- if (!(flags & SERD_ANON_CONT)) {
+ if (serd_stack_is_empty(&writer->anon_stack)) {
write_node(writer, subject, SERD_SUBJECT, flags);
- ++writer->indent;
- write_sep(writer, SEP_S_P);
+ if (!(flags & (SERD_ANON_S_BEGIN | SERD_LIST_S_BEGIN))) {
+ write_sep(writer, SEP_S_P);
+ } else if (flags & SERD_ANON_S_BEGIN) {
+ write_sep(writer, SEP_ANON_S_P);
+ }
} else {
- ++writer->indent;
+ write_sep(writer, SEP_ANON_S_P);
}
reset_context(writer, false);
@@ -799,11 +821,33 @@ serd_writer_write_statement(SerdWriter* writer,
write_node(writer, object, SERD_OBJECT, flags);
}
- if (flags & (SERD_ANON_S_BEGIN|SERD_ANON_O_BEGIN)) {
+ if (flags & (SERD_LIST_S_BEGIN)) {
WriteContext* ctx = (WriteContext*)serd_stack_push(
&writer->anon_stack, sizeof(WriteContext));
*ctx = writer->context;
WriteContext new_context = {
+ CTX_LIST,
+ serd_node_copy(graph), serd_node_copy(subject), NULL };
+ writer->context = new_context;
+ }
+
+ if (flags & (SERD_LIST_O_BEGIN)) {
+ WriteContext* ctx = (WriteContext*)serd_stack_push(
+ &writer->anon_stack, sizeof(WriteContext));
+ *ctx = writer->context;
+ WriteContext new_context = {
+ CTX_LIST,
+ serd_node_copy(graph), serd_node_copy(object), NULL };
+ writer->context = new_context;
+ }
+
+ if (flags & (SERD_ANON_S_BEGIN|SERD_ANON_O_BEGIN)) {
+ WriteContext* ctx = (WriteContext*)serd_stack_push(
+ &writer->anon_stack, sizeof(WriteContext));
+ *ctx = writer->context;
+ WriteContext new_context = {
+ (flags & (SERD_LIST_S_BEGIN|SERD_LIST_O_BEGIN))
+ ? CTX_LIST : CTX_BLANK,
serd_node_copy(graph), serd_node_copy(subject), NULL };
if ((flags & SERD_ANON_S_BEGIN)) {
new_context.predicate = serd_node_copy(predicate);
@@ -824,19 +868,18 @@ serd_writer_end_anon(SerdWriter* writer,
{
if (writer->syntax == SERD_NTRIPLES || writer->syntax == SERD_NQUADS) {
return SERD_SUCCESS;
- }
- if (serd_stack_is_empty(&writer->anon_stack) || writer->indent == 0) {
+ } else if (serd_stack_is_empty(&writer->anon_stack)) {
return serd_world_errorf(writer->world, SERD_ERR_UNKNOWN,
"unexpected end of anonymous node\n");
}
- --writer->indent;
+
write_sep(writer, SEP_ANON_END);
free_context(writer);
writer->context = *anon_stack_top(writer);
serd_stack_pop(&writer->anon_stack, sizeof(WriteContext));
- const bool is_subject = serd_node_equals(node, writer->context.subject);
- if (is_subject) {
- serd_node_set(&writer->context.subject, node);
+
+ if (serd_node_equals(node, writer->context.subject)) {
+ // Now-finished anonymous node is the new subject with no other context
writer->context.predicate->type = SERD_NOTHING;
}
return SERD_SUCCESS;
@@ -877,7 +920,6 @@ serd_writer_new(SerdWorld* world,
writer->write_func = write_func;
writer->stream = stream;
writer->context = context;
- writer->list_subj = NULL;
writer->empty = true;
writer->iface.handle = writer;