From 54f826be2f0eb0c8733e0b0ec033ce31903ff3fe Mon Sep 17 00:00:00 2001 From: David Robillard Date: Thu, 19 May 2011 19:21:04 +0000 Subject: Express inlining information via statement flags rather than node types. Correctly support inline blank subjects (e.g. "[ :a :b ] :c :d ."). git-svn-id: http://svn.drobilla.net/serd/trunk@184 490d8e77-9747-427b-9fa3-0b8f29cee8a0 --- serd/serd.h | 74 ++++++++++++++++---------------- src/reader.c | 90 ++++++++++++++++++++++----------------- src/writer.c | 137 +++++++++++++++++++++++++++++++---------------------------- 3 files changed, 159 insertions(+), 142 deletions(-) diff --git a/serd/serd.h b/serd/serd.h index 977c9e51..277096fe 100644 --- a/serd/serd.h +++ b/serd/serd.h @@ -108,18 +108,31 @@ typedef enum { SERD_NTRIPLES = 2 } SerdSyntax; +/** + Flags indication inline abbreviation information for a statement. +*/ +typedef enum { + SERD_EMPTY_S = 1 << 1, /**< Empty blank node subject */ + SERD_EMPTY_O = 1 << 2, /**< Empty blank node object */ + SERD_ANON_S_BEGIN = 1 << 3, /**< Start of anonymous subject */ + SERD_ANON_O_BEGIN = 1 << 4, /**< Start of anonymous object */ + SERD_ANON_CONT = 1 << 5, /**< Continuation of anonymous node */ + SERD_ANON_END = 1 << 6, /**< End of anonymous subject */ +} SerdStatementFlag; + +/** + Bitwise OR of SerdNodeFlag values. +*/ +typedef uint32_t SerdStatementFlags; + /** Type of a syntactic RDF node. This is more precise than the type of an abstract RDF node. An abstract node is either a resource, literal, or blank. In syntax there are two ways to refer to a resource (by URI or CURIE) and two ways to refer to a blank - (by ID or anonymously). - - Serd represents a node as a string "value" associated with a @ref SerdType, - which is precise enough to support streaming abbreviation. If abbreviation - is not applicable, @ref SERD_ANON_BEGIN and @ref SERD_ANON may simply be - considered equivalent to @ref SERD_BLANK_ID. + (by ID or anonymously). Anonymous (inline) blank nodes are expressed using + SerdStatementFlags rather than this type. */ typedef enum { /** @@ -156,30 +169,15 @@ typedef enum { SERD_CURIE = 3, /** - A blank node ID. + A blank node. Value is a blank node ID, e.g. "id3", which is meaningful only within this serialisation. @see Turtle nodeID */ - SERD_BLANK_ID = 4, - - /** - The first reference to an anonymous (inlined) blank node. - - Value is identical to a @ref SERD_BLANK_ID value (i.e. this type may be - safely considered equivalent to @ref SERD_BLANK_ID). - */ - SERD_ANON_BEGIN = 5, - - /** - An anonymous blank node. + SERD_BLANK = 4, - Value is identical to a @ref SERD_BLANK_ID value (i.e. this type may be - safely considered equivalent to @ref SERD_BLANK_ID). - */ - SERD_ANON = 6 } SerdType; /** @@ -399,13 +397,14 @@ typedef SerdStatus (*SerdPrefixSink)(void* handle, Called for every RDF statement in the serialisation. */ -typedef SerdStatus (*SerdStatementSink)(void* handle, - const SerdNode* graph, - const SerdNode* subject, - const SerdNode* predicate, - const SerdNode* object, - const SerdNode* object_datatype, - const SerdNode* object_lang); +typedef SerdStatus (*SerdStatementSink)(void* handle, + SerdStatementFlags flags, + const SerdNode* graph, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* object_datatype, + const SerdNode* object_lang); /** Sink (callback) for anonymous node end markers. @@ -632,13 +631,14 @@ serd_writer_set_prefix(SerdWriter* writer, */ SERD_API SerdStatus -serd_writer_write_statement(SerdWriter* writer, - const SerdNode* graph, - const SerdNode* subject, - const SerdNode* predicate, - const SerdNode* object, - const SerdNode* object_datatype, - const SerdNode* object_lang); +serd_writer_write_statement(SerdWriter* writer, + SerdStatementFlags flags, + const SerdNode* graph, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* object_datatype, + const SerdNode* object_lang); /** Mark the end of an anonymous node's description. diff --git a/src/reader.c b/src/reader.c index 12109ad3..e76e40e1 100644 --- a/src/reader.c +++ b/src/reader.c @@ -49,9 +49,10 @@ typedef struct { } Node; typedef struct { - const Node* graph; - const Node* subject; - const Node* predicate; + const Node* graph; + const Node* subject; + const Node* predicate; + SerdStatementFlags* flags; } ReadContext; /** Measured UTF-8 string. */ @@ -305,7 +306,7 @@ public_node(SerdReader* reader, const Node* private) } static inline bool -emit_statement(SerdReader* reader, +emit_statement(SerdReader* reader, SerdStatementFlags* flags, const Node* g, const Node* s, const Node* p, const Node* o, const Node* d, Ref l, uint32_t f) { @@ -318,17 +319,20 @@ emit_statement(SerdReader* reader, const SerdNode datatype = public_node(reader, d); const SerdNode lang = public_node_from_ref(reader, SERD_LITERAL, l); object.flags = f; - return !reader->statement_sink(reader->handle, - &graph, - &subject, - &predicate, - &object, - &datatype, - &lang); + bool ret = !reader->statement_sink(reader->handle, + *flags, + &graph, + &subject, + &predicate, + &object, + &datatype, + &lang); + *flags = (*flags & SERD_ANON_CONT) ? SERD_ANON_CONT : 0; + return ret; } static bool read_collection(SerdReader* reader, ReadContext ctx, Node* dest); -static bool read_predicateObjectList(SerdReader* reader, ReadContext ctx); +static bool read_predicateObjectList(SerdReader* reader, ReadContext ctx, bool blank); // [40] hex ::= [#x30-#x39] | [#x41-#x46] static inline uint8_t @@ -1022,45 +1026,53 @@ blank_id(SerdReader* reader) // Impl: [21] blank ::= nodeID | '[ ws* ]' // | '[' ws* predicateObjectList ws* ']' | collection static bool -read_blank(SerdReader* reader, ReadContext ctx, Node* dest) +read_blank(SerdReader* reader, ReadContext ctx, bool subject, Node* dest) { + const bool was_anon_subject = (*ctx.flags | SERD_ANON_CONT); switch (peek_byte(reader)) { case '_': - *dest = make_node(SERD_BLANK_ID, read_nodeID(reader)); + *dest = make_node(SERD_BLANK, read_nodeID(reader)); return true; case '[': eat_byte(reader, '['); read_ws_star(reader); + *dest = make_node(SERD_BLANK, blank_id(reader)); if (peek_byte(reader) == ']') { eat_byte(reader, ']'); - *dest = make_node(SERD_BLANK_ID, blank_id(reader)); + *ctx.flags |= (subject) ? SERD_EMPTY_S : SERD_EMPTY_O; if (ctx.subject) { - TRY_RET(emit_statement(reader, + TRY_RET(emit_statement(reader, ctx.flags, ctx.graph, ctx.subject, ctx.predicate, dest, NULL, 0, 0)); } return true; } - *dest = make_node(SERD_ANON_BEGIN, blank_id(reader)); + + *ctx.flags |= (subject) ? SERD_ANON_S_BEGIN : SERD_ANON_O_BEGIN; if (ctx.subject) { - TRY_RET(emit_statement(reader, + TRY_RET(emit_statement(reader, ctx.flags, ctx.graph, ctx.subject, ctx.predicate, dest, NULL, 0, 0)); - dest->type = SERD_ANON; } ctx.subject = dest; - read_predicateObjectList(reader, ctx); + if (!subject) { + *ctx.flags |= SERD_ANON_CONT; + } + read_predicateObjectList(reader, ctx, true); read_ws_star(reader); eat_byte(reader, ']'); if (reader->end_sink) { const SerdNode end = public_node(reader, dest); reader->end_sink(reader->handle, &end); } + if (subject && !was_anon_subject) { + *ctx.flags &= ~SERD_ANON_CONT; + } return true; case '(': if (read_collection(reader, ctx, dest)) { if (ctx.subject) { - TRY_RET(emit_statement(reader, + TRY_RET(emit_statement(reader, ctx.flags, ctx.graph, ctx.subject, ctx.predicate, dest, NULL, 0, 0)); } @@ -1113,17 +1125,14 @@ read_object(SerdReader* reader, ReadContext ctx) emit = false; // fall through case '_': - TRY_THROW(ret = read_blank(reader, ctx, &o)); + TRY_THROW(ret = read_blank(reader, ctx, false, &o)); break; case '<': case ':': TRY_THROW(ret = read_resource(reader, &o)); break; case '\"': case '+': case '-': case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - TRY_THROW(ret = read_literal(reader, &o, &datatype, &lang, &flags)); - break; - case '.': + case '5': case '6': case '7': case '8': case '9': case '.': TRY_THROW(ret = read_literal(reader, &o, &datatype, &lang, &flags)); break; default: @@ -1150,7 +1159,7 @@ read_object(SerdReader* reader, ReadContext ctx) if (ret && emit) { assert(o.value); - ret = emit_statement(reader, + ret = emit_statement(reader, ctx.flags, ctx.graph, ctx.subject, ctx.predicate, &o, &datatype, lang, flags); } @@ -1168,7 +1177,7 @@ except: // Spec: [8] objectList ::= object ( ',' object )* // Impl: [8] objectList ::= object ( ws* ',' ws* object )* static bool -read_objectList(SerdReader* reader, ReadContext ctx) +read_objectList(SerdReader* reader, ReadContext ctx, bool blank) { TRY_RET(read_object(reader, ctx)); read_ws_star(reader); @@ -1186,7 +1195,7 @@ read_objectList(SerdReader* reader, ReadContext ctx) // Impl: [7] predicateObjectList ::= verb ws+ objectList // (ws* ';' ws* verb ws+ objectList)* (';')? static bool -read_predicateObjectList(SerdReader* reader, ReadContext ctx) +read_predicateObjectList(SerdReader* reader, ReadContext ctx, bool blank) { if (reader->eof) { return false; @@ -1195,7 +1204,7 @@ read_predicateObjectList(SerdReader* reader, ReadContext ctx) TRY_RET(read_verb(reader, &predicate)); TRY_THROW(read_ws_plus(reader)); ctx.predicate = &predicate; - TRY_THROW(read_objectList(reader, ctx)); + TRY_THROW(read_objectList(reader, ctx, blank)); pop_string(reader, predicate.value); predicate.value = 0; read_ws_star(reader); @@ -1209,7 +1218,7 @@ read_predicateObjectList(SerdReader* reader, ReadContext ctx) TRY_THROW(read_verb(reader, &predicate)); ctx.predicate = &predicate; TRY_THROW(read_ws_plus(reader)); - TRY_THROW(read_objectList(reader, ctx)); + TRY_THROW(read_objectList(reader, ctx, blank)); pop_string(reader, predicate.value); predicate.value = 0; read_ws_star(reader); @@ -1229,18 +1238,20 @@ read_collection_rec(SerdReader* reader, ReadContext ctx) read_ws_star(reader); if (peek_byte(reader) == ')') { eat_byte(reader, ')'); - TRY_RET(emit_statement(reader, NULL, + TRY_RET(emit_statement(reader, ctx.flags, + NULL, ctx.subject, &reader->rdf_rest, &reader->rdf_nil, NULL, 0, 0)); return false; } else { - const Node rest = make_node(SERD_BLANK_ID, blank_id(reader)); - TRY_RET(emit_statement(reader, ctx.graph, + const Node rest = make_node(SERD_BLANK, blank_id(reader)); + TRY_RET(emit_statement(reader, ctx.flags, + ctx.graph, ctx.subject, &reader->rdf_rest, &rest, NULL, 0, 0)); - ctx.subject = &rest; + ctx.subject = &rest; ctx.predicate = &reader->rdf_first; if (read_object(reader, ctx)) { read_collection_rec(reader, ctx); @@ -1266,7 +1277,7 @@ read_collection(SerdReader* reader, ReadContext ctx, Node* dest) return true; } - *dest = make_node(SERD_BLANK_ID, blank_id(reader)); + *dest = make_node(SERD_BLANK, blank_id(reader)); ctx.subject = dest; ctx.predicate = &reader->rdf_first; if (!read_object(reader, ctx)) { @@ -1284,7 +1295,7 @@ read_subject(SerdReader* reader, ReadContext ctx) Node subject = INTERNAL_NODE_NULL; switch (peek_byte(reader)) { case '[': case '(': case '_': - read_blank(reader, ctx, &subject); + read_blank(reader, ctx, true, &subject); break; default: read_resource(reader, &subject); @@ -1302,7 +1313,7 @@ read_triples(SerdReader* reader, ReadContext ctx) if (subject.value != 0) { ctx.subject = &subject; TRY_RET(read_ws_plus(reader)); - ret = read_predicateObjectList(reader, ctx); + ret = read_predicateObjectList(reader, ctx, false); pop_string(reader, subject.value); } ctx.subject = ctx.predicate = 0; @@ -1370,7 +1381,8 @@ read_directive(SerdReader* reader) static bool read_statement(SerdReader* reader) { - ReadContext ctx = { 0, 0, 0 }; + SerdStatementFlags flags = 0; + ReadContext ctx = { 0, 0, 0, &flags }; read_ws_star(reader); if (reader->eof) { return true; diff --git a/src/writer.c b/src/writer.c index fafde07c..550337a7 100644 --- a/src/writer.c +++ b/src/writer.c @@ -184,46 +184,47 @@ reset_context(SerdWriter* writer) writer->empty = false; } +typedef enum { + FIELD_NONE, + FIELD_SUBJECT, + FIELD_PREDICATE, + FIELD_OBJECT +} Field; + static bool -write_node(SerdWriter* writer, - const SerdNode* node, - const SerdNode* datatype, - const SerdNode* lang, - bool is_predicate) +write_node(SerdWriter* writer, + const SerdNode* node, + const SerdNode* datatype, + const SerdNode* lang, + Field field, + SerdStatementFlags flags) { SerdChunk uri_prefix; SerdChunk uri_suffix; switch (node->type) { case SERD_NOTHING: return false; - case SERD_ANON_BEGIN: - if (writer->syntax != SERD_NTRIPLES) { + case SERD_BLANK: + if (writer->syntax != SERD_NTRIPLES + && ((field == FIELD_SUBJECT && (flags & SERD_ANON_S_BEGIN)) + || (field == FIELD_OBJECT && (flags & SERD_ANON_O_BEGIN)))) { ++writer->indent; serd_writer_write_delim(writer, '['); - WriteContext* ctx = (WriteContext*)serd_stack_push( - &writer->anon_stack, sizeof(WriteContext)); - ctx->graph = serd_node_copy(&writer->context.graph); - ctx->subject = serd_node_copy(&writer->context.subject); - ctx->predicate = serd_node_copy(&writer->context.predicate); - ctx->object = serd_node_copy(&writer->context.object); - reset_context(writer); - writer->context.subject = serd_node_copy(node); - break; - } - case SERD_ANON: - if (writer->syntax != SERD_NTRIPLES) { - break; - } // else fall through - case SERD_BLANK_ID: - writer->sink("_:", 2, writer->stream); - if (writer->bprefix - && !strncmp((const char*)node->buf, (const char*)writer->bprefix, - writer->bprefix_len)) { - writer->sink(node->buf + writer->bprefix_len, - node->n_bytes - writer->bprefix_len, - writer->stream); + } else if (writer->syntax != SERD_NTRIPLES + && ((field == FIELD_SUBJECT && (flags & SERD_EMPTY_S)) + || (field == FIELD_OBJECT && (flags & SERD_EMPTY_O)))) { + writer->sink("[]", 2, writer->stream); } else { - writer->sink(node->buf, node->n_bytes, writer->stream); + writer->sink("_:", 2, writer->stream); + if (writer->bprefix + && !strncmp((const char*)node->buf, (const char*)writer->bprefix, + writer->bprefix_len)) { + writer->sink(node->buf + writer->bprefix_len, + node->n_bytes - writer->bprefix_len, + writer->stream); + } else { + writer->sink(node->buf, node->n_bytes, writer->stream); + } } break; case SERD_CURIE: @@ -268,7 +269,7 @@ write_node(SerdWriter* writer, writer->sink(lang->buf, lang->n_bytes, writer->stream); } else if (datatype && datatype->buf) { writer->sink("^^", 2, writer->stream); - write_node(writer, datatype, NULL, NULL, false); + write_node(writer, datatype, NULL, NULL, FIELD_NONE, flags); } break; case SERD_URI: @@ -308,22 +309,24 @@ write_node(SerdWriter* writer, SERD_API SerdStatus -serd_writer_write_statement(SerdWriter* writer, - const SerdNode* graph, - const SerdNode* subject, - const SerdNode* predicate, - const SerdNode* object, - const SerdNode* object_datatype, - const SerdNode* object_lang) +serd_writer_write_statement(SerdWriter* writer, + SerdStatementFlags flags, + const SerdNode* graph, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* object_datatype, + const SerdNode* object_lang) { assert(subject && predicate && object); switch (writer->syntax) { case SERD_NTRIPLES: - write_node(writer, subject, NULL, NULL, false); + write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags); writer->sink(" ", 1, writer->stream); - write_node(writer, predicate, NULL, NULL, true); + write_node(writer, predicate, NULL, NULL, FIELD_PREDICATE, flags); writer->sink(" ", 1, writer->stream); - if (!write_node(writer, object, object_datatype, object_lang, false)) { + if (!write_node(writer, object, object_datatype, object_lang, + FIELD_OBJECT, flags)) { return SERD_ERR_UNKNOWN; } writer->sink(" .\n", 3, writer->stream); @@ -334,24 +337,19 @@ serd_writer_write_statement(SerdWriter* writer, if (serd_node_equals(subject, &writer->context.subject)) { if (serd_node_equals(predicate, &writer->context.predicate)) { // Abbreviate S P - if (writer->context.object.type == SERD_ANON_BEGIN) { - writer->sink(" , ", 3, writer->stream); - write_node(writer, object, object_datatype, object_lang, false); - } else { - ++writer->indent; - serd_writer_write_delim(writer, ','); - write_node(writer, object, object_datatype, object_lang, false); - --writer->indent; - } + ++writer->indent; + serd_writer_write_delim(writer, ','); + write_node(writer, object, object_datatype, object_lang, + FIELD_OBJECT, flags); + --writer->indent; } else { // Abbreviate S if (writer->context.predicate.buf) { serd_writer_write_delim(writer, ';'); } else { - ++writer->indent; serd_writer_write_delim(writer, '\n'); } - write_node(writer, predicate, NULL, NULL, true); + write_node(writer, predicate, NULL, NULL, FIELD_PREDICATE, flags); if (writer->context.predicate.buf) serd_node_free(&writer->context.predicate); writer->context.predicate = serd_node_copy(predicate); @@ -359,7 +357,8 @@ serd_writer_write_statement(SerdWriter* writer, serd_node_free(&writer->context.object); writer->context.object = serd_node_copy(object); writer->sink(" ", 1, writer->stream); - write_node(writer, object, object_datatype, object_lang, false); + write_node(writer, object, object_datatype, object_lang, + FIELD_OBJECT, flags); } } else { if (writer->context.subject.buf) { @@ -374,29 +373,33 @@ serd_writer_write_statement(SerdWriter* writer, serd_writer_write_delim(writer, '\n'); } - if (subject->type == SERD_ANON_BEGIN) { - writer->sink("[ ", 2, writer->stream); - ++writer->indent; - WriteContext* ctx = (WriteContext*)serd_stack_push( - &writer->anon_stack, sizeof(WriteContext)); - *ctx = writer->context; - } else { - write_node(writer, subject, NULL, NULL, false); + if (!(flags & SERD_ANON_CONT)) { + write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags); ++writer->indent; - if (subject->type != SERD_ANON_BEGIN && subject->type != SERD_ANON) { + if (!(flags & SERD_ANON_S_BEGIN)) { serd_writer_write_delim(writer, '\n'); } + } else { + ++writer->indent; } reset_context(writer); writer->context.subject = serd_node_copy(subject); writer->context.predicate = SERD_NODE_NULL; - write_node(writer, predicate, NULL, NULL, true); + write_node(writer, predicate, NULL, NULL, FIELD_PREDICATE, flags); writer->context.predicate = serd_node_copy(predicate); writer->sink(" ", 1, writer->stream); - write_node(writer, object, object_datatype, object_lang, false); + write_node(writer, object, object_datatype, object_lang, + FIELD_OBJECT, flags); + } + + if (writer->syntax != SERD_NTRIPLES + && ((flags & SERD_ANON_S_BEGIN) || (flags & SERD_ANON_O_BEGIN))) { + WriteContext* ctx = (WriteContext*)serd_stack_push( + &writer->anon_stack, sizeof(WriteContext)); + *ctx = writer->context; } const WriteContext new_context = { serd_node_copy(graph), @@ -424,11 +427,13 @@ serd_writer_end_anon(SerdWriter* writer, --writer->indent; serd_writer_write_delim(writer, '\n'); writer->sink("]", 1, writer->stream); + const bool is_subject = serd_node_equals(node, &writer->context.subject); reset_context(writer); writer->context = *anon_stack_top(writer); serd_stack_pop(&writer->anon_stack, sizeof(WriteContext)); - if (!writer->context.subject.buf) { // End of anonymous subject - writer->context.subject = serd_node_copy(node); + if (is_subject) { + writer->context.subject = serd_node_copy(node); + writer->context.predicate = SERD_NODE_NULL; } return SERD_SUCCESS; } -- cgit v1.2.1