aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2011-05-19 19:21:04 +0000
committerDavid Robillard <d@drobilla.net>2011-05-19 19:21:04 +0000
commit54f826be2f0eb0c8733e0b0ec033ce31903ff3fe (patch)
tree229fa8b771a4057be33ec96ff75533eb946e1127
parentb92d598a22fdad8c96a1167362d4bb79015af006 (diff)
downloadserd-54f826be2f0eb0c8733e0b0ec033ce31903ff3fe.tar.gz
serd-54f826be2f0eb0c8733e0b0ec033ce31903ff3fe.tar.bz2
serd-54f826be2f0eb0c8733e0b0ec033ce31903ff3fe.zip
Express inlining information via statement flags rather than node types.
Correctly support inline blank subjects (e.g. "[ :a :b ] :c :d ."). git-svn-id: http://svn.drobilla.net/serd/trunk@184 490d8e77-9747-427b-9fa3-0b8f29cee8a0
-rw-r--r--serd/serd.h74
-rw-r--r--src/reader.c90
-rw-r--r--src/writer.c137
3 files changed, 159 insertions, 142 deletions
diff --git a/serd/serd.h b/serd/serd.h
index 977c9e51..277096fe 100644
--- a/serd/serd.h
+++ b/serd/serd.h
@@ -109,17 +109,30 @@ typedef enum {
} SerdSyntax;
/**
+ Flags indication inline abbreviation information for a statement.
+*/
+typedef enum {
+ SERD_EMPTY_S = 1 << 1, /**< Empty blank node subject */
+ SERD_EMPTY_O = 1 << 2, /**< Empty blank node object */
+ SERD_ANON_S_BEGIN = 1 << 3, /**< Start of anonymous subject */
+ SERD_ANON_O_BEGIN = 1 << 4, /**< Start of anonymous object */
+ SERD_ANON_CONT = 1 << 5, /**< Continuation of anonymous node */
+ SERD_ANON_END = 1 << 6, /**< End of anonymous subject */
+} SerdStatementFlag;
+
+/**
+ Bitwise OR of SerdNodeFlag values.
+*/
+typedef uint32_t SerdStatementFlags;
+
+/**
Type of a syntactic RDF node.
This is more precise than the type of an abstract RDF node. An abstract
node is either a resource, literal, or blank. In syntax there are two ways
to refer to a resource (by URI or CURIE) and two ways to refer to a blank
- (by ID or anonymously).
-
- Serd represents a node as a string "value" associated with a @ref SerdType,
- which is precise enough to support streaming abbreviation. If abbreviation
- is not applicable, @ref SERD_ANON_BEGIN and @ref SERD_ANON may simply be
- considered equivalent to @ref SERD_BLANK_ID.
+ (by ID or anonymously). Anonymous (inline) blank nodes are expressed using
+ SerdStatementFlags rather than this type.
*/
typedef enum {
/**
@@ -156,30 +169,15 @@ typedef enum {
SERD_CURIE = 3,
/**
- A blank node ID.
+ A blank node.
Value is a blank node ID, e.g. "id3", which is meaningful only within
this serialisation.
@see <a href="http://www.w3.org/TeamSubmission/turtle#nodeID">Turtle
<tt>nodeID</tt></a>
*/
- SERD_BLANK_ID = 4,
-
- /**
- The first reference to an anonymous (inlined) blank node.
-
- Value is identical to a @ref SERD_BLANK_ID value (i.e. this type may be
- safely considered equivalent to @ref SERD_BLANK_ID).
- */
- SERD_ANON_BEGIN = 5,
-
- /**
- An anonymous blank node.
+ SERD_BLANK = 4,
- Value is identical to a @ref SERD_BLANK_ID value (i.e. this type may be
- safely considered equivalent to @ref SERD_BLANK_ID).
- */
- SERD_ANON = 6
} SerdType;
/**
@@ -399,13 +397,14 @@ typedef SerdStatus (*SerdPrefixSink)(void* handle,
Called for every RDF statement in the serialisation.
*/
-typedef SerdStatus (*SerdStatementSink)(void* handle,
- const SerdNode* graph,
- const SerdNode* subject,
- const SerdNode* predicate,
- const SerdNode* object,
- const SerdNode* object_datatype,
- const SerdNode* object_lang);
+typedef SerdStatus (*SerdStatementSink)(void* handle,
+ SerdStatementFlags flags,
+ const SerdNode* graph,
+ const SerdNode* subject,
+ const SerdNode* predicate,
+ const SerdNode* object,
+ const SerdNode* object_datatype,
+ const SerdNode* object_lang);
/**
Sink (callback) for anonymous node end markers.
@@ -632,13 +631,14 @@ serd_writer_set_prefix(SerdWriter* writer,
*/
SERD_API
SerdStatus
-serd_writer_write_statement(SerdWriter* writer,
- const SerdNode* graph,
- const SerdNode* subject,
- const SerdNode* predicate,
- const SerdNode* object,
- const SerdNode* object_datatype,
- const SerdNode* object_lang);
+serd_writer_write_statement(SerdWriter* writer,
+ SerdStatementFlags flags,
+ const SerdNode* graph,
+ const SerdNode* subject,
+ const SerdNode* predicate,
+ const SerdNode* object,
+ const SerdNode* object_datatype,
+ const SerdNode* object_lang);
/**
Mark the end of an anonymous node's description.
diff --git a/src/reader.c b/src/reader.c
index 12109ad3..e76e40e1 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -49,9 +49,10 @@ typedef struct {
} Node;
typedef struct {
- const Node* graph;
- const Node* subject;
- const Node* predicate;
+ const Node* graph;
+ const Node* subject;
+ const Node* predicate;
+ SerdStatementFlags* flags;
} ReadContext;
/** Measured UTF-8 string. */
@@ -305,7 +306,7 @@ public_node(SerdReader* reader, const Node* private)
}
static inline bool
-emit_statement(SerdReader* reader,
+emit_statement(SerdReader* reader, SerdStatementFlags* flags,
const Node* g, const Node* s, const Node* p, const Node* o,
const Node* d, Ref l, uint32_t f)
{
@@ -318,17 +319,20 @@ emit_statement(SerdReader* reader,
const SerdNode datatype = public_node(reader, d);
const SerdNode lang = public_node_from_ref(reader, SERD_LITERAL, l);
object.flags = f;
- return !reader->statement_sink(reader->handle,
- &graph,
- &subject,
- &predicate,
- &object,
- &datatype,
- &lang);
+ bool ret = !reader->statement_sink(reader->handle,
+ *flags,
+ &graph,
+ &subject,
+ &predicate,
+ &object,
+ &datatype,
+ &lang);
+ *flags = (*flags & SERD_ANON_CONT) ? SERD_ANON_CONT : 0;
+ return ret;
}
static bool read_collection(SerdReader* reader, ReadContext ctx, Node* dest);
-static bool read_predicateObjectList(SerdReader* reader, ReadContext ctx);
+static bool read_predicateObjectList(SerdReader* reader, ReadContext ctx, bool blank);
// [40] hex ::= [#x30-#x39] | [#x41-#x46]
static inline uint8_t
@@ -1022,45 +1026,53 @@ blank_id(SerdReader* reader)
// Impl: [21] blank ::= nodeID | '[ ws* ]'
// | '[' ws* predicateObjectList ws* ']' | collection
static bool
-read_blank(SerdReader* reader, ReadContext ctx, Node* dest)
+read_blank(SerdReader* reader, ReadContext ctx, bool subject, Node* dest)
{
+ const bool was_anon_subject = (*ctx.flags | SERD_ANON_CONT);
switch (peek_byte(reader)) {
case '_':
- *dest = make_node(SERD_BLANK_ID, read_nodeID(reader));
+ *dest = make_node(SERD_BLANK, read_nodeID(reader));
return true;
case '[':
eat_byte(reader, '[');
read_ws_star(reader);
+ *dest = make_node(SERD_BLANK, blank_id(reader));
if (peek_byte(reader) == ']') {
eat_byte(reader, ']');
- *dest = make_node(SERD_BLANK_ID, blank_id(reader));
+ *ctx.flags |= (subject) ? SERD_EMPTY_S : SERD_EMPTY_O;
if (ctx.subject) {
- TRY_RET(emit_statement(reader,
+ TRY_RET(emit_statement(reader, ctx.flags,
ctx.graph, ctx.subject, ctx.predicate,
dest, NULL, 0, 0));
}
return true;
}
- *dest = make_node(SERD_ANON_BEGIN, blank_id(reader));
+
+ *ctx.flags |= (subject) ? SERD_ANON_S_BEGIN : SERD_ANON_O_BEGIN;
if (ctx.subject) {
- TRY_RET(emit_statement(reader,
+ TRY_RET(emit_statement(reader, ctx.flags,
ctx.graph, ctx.subject, ctx.predicate,
dest, NULL, 0, 0));
- dest->type = SERD_ANON;
}
ctx.subject = dest;
- read_predicateObjectList(reader, ctx);
+ if (!subject) {
+ *ctx.flags |= SERD_ANON_CONT;
+ }
+ read_predicateObjectList(reader, ctx, true);
read_ws_star(reader);
eat_byte(reader, ']');
if (reader->end_sink) {
const SerdNode end = public_node(reader, dest);
reader->end_sink(reader->handle, &end);
}
+ if (subject && !was_anon_subject) {
+ *ctx.flags &= ~SERD_ANON_CONT;
+ }
return true;
case '(':
if (read_collection(reader, ctx, dest)) {
if (ctx.subject) {
- TRY_RET(emit_statement(reader,
+ TRY_RET(emit_statement(reader, ctx.flags,
ctx.graph, ctx.subject, ctx.predicate,
dest, NULL, 0, 0));
}
@@ -1113,17 +1125,14 @@ read_object(SerdReader* reader, ReadContext ctx)
emit = false;
// fall through
case '_':
- TRY_THROW(ret = read_blank(reader, ctx, &o));
+ TRY_THROW(ret = read_blank(reader, ctx, false, &o));
break;
case '<': case ':':
TRY_THROW(ret = read_resource(reader, &o));
break;
case '\"': case '+': case '-':
case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- TRY_THROW(ret = read_literal(reader, &o, &datatype, &lang, &flags));
- break;
- case '.':
+ case '5': case '6': case '7': case '8': case '9': case '.':
TRY_THROW(ret = read_literal(reader, &o, &datatype, &lang, &flags));
break;
default:
@@ -1150,7 +1159,7 @@ read_object(SerdReader* reader, ReadContext ctx)
if (ret && emit) {
assert(o.value);
- ret = emit_statement(reader,
+ ret = emit_statement(reader, ctx.flags,
ctx.graph, ctx.subject, ctx.predicate,
&o, &datatype, lang, flags);
}
@@ -1168,7 +1177,7 @@ except:
// Spec: [8] objectList ::= object ( ',' object )*
// Impl: [8] objectList ::= object ( ws* ',' ws* object )*
static bool
-read_objectList(SerdReader* reader, ReadContext ctx)
+read_objectList(SerdReader* reader, ReadContext ctx, bool blank)
{
TRY_RET(read_object(reader, ctx));
read_ws_star(reader);
@@ -1186,7 +1195,7 @@ read_objectList(SerdReader* reader, ReadContext ctx)
// Impl: [7] predicateObjectList ::= verb ws+ objectList
// (ws* ';' ws* verb ws+ objectList)* (';')?
static bool
-read_predicateObjectList(SerdReader* reader, ReadContext ctx)
+read_predicateObjectList(SerdReader* reader, ReadContext ctx, bool blank)
{
if (reader->eof) {
return false;
@@ -1195,7 +1204,7 @@ read_predicateObjectList(SerdReader* reader, ReadContext ctx)
TRY_RET(read_verb(reader, &predicate));
TRY_THROW(read_ws_plus(reader));
ctx.predicate = &predicate;
- TRY_THROW(read_objectList(reader, ctx));
+ TRY_THROW(read_objectList(reader, ctx, blank));
pop_string(reader, predicate.value);
predicate.value = 0;
read_ws_star(reader);
@@ -1209,7 +1218,7 @@ read_predicateObjectList(SerdReader* reader, ReadContext ctx)
TRY_THROW(read_verb(reader, &predicate));
ctx.predicate = &predicate;
TRY_THROW(read_ws_plus(reader));
- TRY_THROW(read_objectList(reader, ctx));
+ TRY_THROW(read_objectList(reader, ctx, blank));
pop_string(reader, predicate.value);
predicate.value = 0;
read_ws_star(reader);
@@ -1229,18 +1238,20 @@ read_collection_rec(SerdReader* reader, ReadContext ctx)
read_ws_star(reader);
if (peek_byte(reader) == ')') {
eat_byte(reader, ')');
- TRY_RET(emit_statement(reader, NULL,
+ TRY_RET(emit_statement(reader, ctx.flags,
+ NULL,
ctx.subject,
&reader->rdf_rest,
&reader->rdf_nil, NULL, 0, 0));
return false;
} else {
- const Node rest = make_node(SERD_BLANK_ID, blank_id(reader));
- TRY_RET(emit_statement(reader, ctx.graph,
+ const Node rest = make_node(SERD_BLANK, blank_id(reader));
+ TRY_RET(emit_statement(reader, ctx.flags,
+ ctx.graph,
ctx.subject,
&reader->rdf_rest,
&rest, NULL, 0, 0));
- ctx.subject = &rest;
+ ctx.subject = &rest;
ctx.predicate = &reader->rdf_first;
if (read_object(reader, ctx)) {
read_collection_rec(reader, ctx);
@@ -1266,7 +1277,7 @@ read_collection(SerdReader* reader, ReadContext ctx, Node* dest)
return true;
}
- *dest = make_node(SERD_BLANK_ID, blank_id(reader));
+ *dest = make_node(SERD_BLANK, blank_id(reader));
ctx.subject = dest;
ctx.predicate = &reader->rdf_first;
if (!read_object(reader, ctx)) {
@@ -1284,7 +1295,7 @@ read_subject(SerdReader* reader, ReadContext ctx)
Node subject = INTERNAL_NODE_NULL;
switch (peek_byte(reader)) {
case '[': case '(': case '_':
- read_blank(reader, ctx, &subject);
+ read_blank(reader, ctx, true, &subject);
break;
default:
read_resource(reader, &subject);
@@ -1302,7 +1313,7 @@ read_triples(SerdReader* reader, ReadContext ctx)
if (subject.value != 0) {
ctx.subject = &subject;
TRY_RET(read_ws_plus(reader));
- ret = read_predicateObjectList(reader, ctx);
+ ret = read_predicateObjectList(reader, ctx, false);
pop_string(reader, subject.value);
}
ctx.subject = ctx.predicate = 0;
@@ -1370,7 +1381,8 @@ read_directive(SerdReader* reader)
static bool
read_statement(SerdReader* reader)
{
- ReadContext ctx = { 0, 0, 0 };
+ SerdStatementFlags flags = 0;
+ ReadContext ctx = { 0, 0, 0, &flags };
read_ws_star(reader);
if (reader->eof) {
return true;
diff --git a/src/writer.c b/src/writer.c
index fafde07c..550337a7 100644
--- a/src/writer.c
+++ b/src/writer.c
@@ -184,46 +184,47 @@ reset_context(SerdWriter* writer)
writer->empty = false;
}
+typedef enum {
+ FIELD_NONE,
+ FIELD_SUBJECT,
+ FIELD_PREDICATE,
+ FIELD_OBJECT
+} Field;
+
static bool
-write_node(SerdWriter* writer,
- const SerdNode* node,
- const SerdNode* datatype,
- const SerdNode* lang,
- bool is_predicate)
+write_node(SerdWriter* writer,
+ const SerdNode* node,
+ const SerdNode* datatype,
+ const SerdNode* lang,
+ Field field,
+ SerdStatementFlags flags)
{
SerdChunk uri_prefix;
SerdChunk uri_suffix;
switch (node->type) {
case SERD_NOTHING:
return false;
- case SERD_ANON_BEGIN:
- if (writer->syntax != SERD_NTRIPLES) {
+ case SERD_BLANK:
+ if (writer->syntax != SERD_NTRIPLES
+ && ((field == FIELD_SUBJECT && (flags & SERD_ANON_S_BEGIN))
+ || (field == FIELD_OBJECT && (flags & SERD_ANON_O_BEGIN)))) {
++writer->indent;
serd_writer_write_delim(writer, '[');
- WriteContext* ctx = (WriteContext*)serd_stack_push(
- &writer->anon_stack, sizeof(WriteContext));
- ctx->graph = serd_node_copy(&writer->context.graph);
- ctx->subject = serd_node_copy(&writer->context.subject);
- ctx->predicate = serd_node_copy(&writer->context.predicate);
- ctx->object = serd_node_copy(&writer->context.object);
- reset_context(writer);
- writer->context.subject = serd_node_copy(node);
- break;
- }
- case SERD_ANON:
- if (writer->syntax != SERD_NTRIPLES) {
- break;
- } // else fall through
- case SERD_BLANK_ID:
- writer->sink("_:", 2, writer->stream);
- if (writer->bprefix
- && !strncmp((const char*)node->buf, (const char*)writer->bprefix,
- writer->bprefix_len)) {
- writer->sink(node->buf + writer->bprefix_len,
- node->n_bytes - writer->bprefix_len,
- writer->stream);
+ } else if (writer->syntax != SERD_NTRIPLES
+ && ((field == FIELD_SUBJECT && (flags & SERD_EMPTY_S))
+ || (field == FIELD_OBJECT && (flags & SERD_EMPTY_O)))) {
+ writer->sink("[]", 2, writer->stream);
} else {
- writer->sink(node->buf, node->n_bytes, writer->stream);
+ writer->sink("_:", 2, writer->stream);
+ if (writer->bprefix
+ && !strncmp((const char*)node->buf, (const char*)writer->bprefix,
+ writer->bprefix_len)) {
+ writer->sink(node->buf + writer->bprefix_len,
+ node->n_bytes - writer->bprefix_len,
+ writer->stream);
+ } else {
+ writer->sink(node->buf, node->n_bytes, writer->stream);
+ }
}
break;
case SERD_CURIE:
@@ -268,7 +269,7 @@ write_node(SerdWriter* writer,
writer->sink(lang->buf, lang->n_bytes, writer->stream);
} else if (datatype && datatype->buf) {
writer->sink("^^", 2, writer->stream);
- write_node(writer, datatype, NULL, NULL, false);
+ write_node(writer, datatype, NULL, NULL, FIELD_NONE, flags);
}
break;
case SERD_URI:
@@ -308,22 +309,24 @@ write_node(SerdWriter* writer,
SERD_API
SerdStatus
-serd_writer_write_statement(SerdWriter* writer,
- const SerdNode* graph,
- const SerdNode* subject,
- const SerdNode* predicate,
- const SerdNode* object,
- const SerdNode* object_datatype,
- const SerdNode* object_lang)
+serd_writer_write_statement(SerdWriter* writer,
+ SerdStatementFlags flags,
+ const SerdNode* graph,
+ const SerdNode* subject,
+ const SerdNode* predicate,
+ const SerdNode* object,
+ const SerdNode* object_datatype,
+ const SerdNode* object_lang)
{
assert(subject && predicate && object);
switch (writer->syntax) {
case SERD_NTRIPLES:
- write_node(writer, subject, NULL, NULL, false);
+ write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags);
writer->sink(" ", 1, writer->stream);
- write_node(writer, predicate, NULL, NULL, true);
+ write_node(writer, predicate, NULL, NULL, FIELD_PREDICATE, flags);
writer->sink(" ", 1, writer->stream);
- if (!write_node(writer, object, object_datatype, object_lang, false)) {
+ if (!write_node(writer, object, object_datatype, object_lang,
+ FIELD_OBJECT, flags)) {
return SERD_ERR_UNKNOWN;
}
writer->sink(" .\n", 3, writer->stream);
@@ -334,24 +337,19 @@ serd_writer_write_statement(SerdWriter* writer,
if (serd_node_equals(subject, &writer->context.subject)) {
if (serd_node_equals(predicate, &writer->context.predicate)) {
// Abbreviate S P
- if (writer->context.object.type == SERD_ANON_BEGIN) {
- writer->sink(" , ", 3, writer->stream);
- write_node(writer, object, object_datatype, object_lang, false);
- } else {
- ++writer->indent;
- serd_writer_write_delim(writer, ',');
- write_node(writer, object, object_datatype, object_lang, false);
- --writer->indent;
- }
+ ++writer->indent;
+ serd_writer_write_delim(writer, ',');
+ write_node(writer, object, object_datatype, object_lang,
+ FIELD_OBJECT, flags);
+ --writer->indent;
} else {
// Abbreviate S
if (writer->context.predicate.buf) {
serd_writer_write_delim(writer, ';');
} else {
- ++writer->indent;
serd_writer_write_delim(writer, '\n');
}
- write_node(writer, predicate, NULL, NULL, true);
+ write_node(writer, predicate, NULL, NULL, FIELD_PREDICATE, flags);
if (writer->context.predicate.buf)
serd_node_free(&writer->context.predicate);
writer->context.predicate = serd_node_copy(predicate);
@@ -359,7 +357,8 @@ serd_writer_write_statement(SerdWriter* writer,
serd_node_free(&writer->context.object);
writer->context.object = serd_node_copy(object);
writer->sink(" ", 1, writer->stream);
- write_node(writer, object, object_datatype, object_lang, false);
+ write_node(writer, object, object_datatype, object_lang,
+ FIELD_OBJECT, flags);
}
} else {
if (writer->context.subject.buf) {
@@ -374,29 +373,33 @@ serd_writer_write_statement(SerdWriter* writer,
serd_writer_write_delim(writer, '\n');
}
- if (subject->type == SERD_ANON_BEGIN) {
- writer->sink("[ ", 2, writer->stream);
- ++writer->indent;
- WriteContext* ctx = (WriteContext*)serd_stack_push(
- &writer->anon_stack, sizeof(WriteContext));
- *ctx = writer->context;
- } else {
- write_node(writer, subject, NULL, NULL, false);
+ if (!(flags & SERD_ANON_CONT)) {
+ write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags);
++writer->indent;
- if (subject->type != SERD_ANON_BEGIN && subject->type != SERD_ANON) {
+ if (!(flags & SERD_ANON_S_BEGIN)) {
serd_writer_write_delim(writer, '\n');
}
+ } else {
+ ++writer->indent;
}
reset_context(writer);
writer->context.subject = serd_node_copy(subject);
writer->context.predicate = SERD_NODE_NULL;
- write_node(writer, predicate, NULL, NULL, true);
+ write_node(writer, predicate, NULL, NULL, FIELD_PREDICATE, flags);
writer->context.predicate = serd_node_copy(predicate);
writer->sink(" ", 1, writer->stream);
- write_node(writer, object, object_datatype, object_lang, false);
+ write_node(writer, object, object_datatype, object_lang,
+ FIELD_OBJECT, flags);
+ }
+
+ if (writer->syntax != SERD_NTRIPLES
+ && ((flags & SERD_ANON_S_BEGIN) || (flags & SERD_ANON_O_BEGIN))) {
+ WriteContext* ctx = (WriteContext*)serd_stack_push(
+ &writer->anon_stack, sizeof(WriteContext));
+ *ctx = writer->context;
}
const WriteContext new_context = { serd_node_copy(graph),
@@ -424,11 +427,13 @@ serd_writer_end_anon(SerdWriter* writer,
--writer->indent;
serd_writer_write_delim(writer, '\n');
writer->sink("]", 1, writer->stream);
+ const bool is_subject = serd_node_equals(node, &writer->context.subject);
reset_context(writer);
writer->context = *anon_stack_top(writer);
serd_stack_pop(&writer->anon_stack, sizeof(WriteContext));
- if (!writer->context.subject.buf) { // End of anonymous subject
- writer->context.subject = serd_node_copy(node);
+ if (is_subject) {
+ writer->context.subject = serd_node_copy(node);
+ writer->context.predicate = SERD_NODE_NULL;
}
return SERD_SUCCESS;
}