aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2011-01-23 07:44:29 +0000
committerDavid Robillard <d@drobilla.net>2011-01-23 07:44:29 +0000
commit983c964a11919f68f62d0a2193204789c27dc99c (patch)
treee15fb23b4ec7776f81504082a23f8c1d1896f9d5
parentd7ebb11e67fe6456d89bac6d173c271398fa6d0e (diff)
downloadserd-983c964a11919f68f62d0a2193204789c27dc99c.tar.gz
serd-983c964a11919f68f62d0a2193204789c27dc99c.tar.bz2
serd-983c964a11919f68f62d0a2193204789c27dc99c.zip
Streaming abbreviation.
git-svn-id: http://svn.drobilla.net/serd/trunk@41 490d8e77-9747-427b-9fa3-0b8f29cee8a0
-rw-r--r--serd/serd.h78
-rw-r--r--src/reader.c129
-rw-r--r--src/serd_stack.h79
-rw-r--r--src/serdi.c10
-rw-r--r--src/writer.c220
5 files changed, 363 insertions, 153 deletions
diff --git a/serd/serd.h b/serd/serd.h
index 295baee0..8f8dfdf3 100644
--- a/serd/serd.h
+++ b/serd/serd.h
@@ -8,11 +8,11 @@
*
* Serd is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
* License for details.
*
* You should have received a copy of the GNU Lesser General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/* @file
@@ -59,16 +59,54 @@ typedef struct SerdWriterImpl* SerdWriter; /**< RDF writer. */
/** RDF syntax */
typedef enum {
- SERD_TURTLE = 1,
- SERD_NTRIPLES = 2
+ SERD_TURTLE = 1, /**< <http://www.w3.org/TeamSubmission/turtle/> */
+ SERD_NTRIPLES = 2 /**< <http://www.w3.org/TR/rdf-testcases/#ntriples> */
} SerdSyntax;
-/** Type of RDF node. */
+/** Type of a syntactic RDF node.
+ * This is more precise than the type of an abstract RDF node. An abstract
+ * node is either a resource, literal, or blank. In syntax there are two
+ * ways to refer to a resource (URI or CURIE), and two ways to refer to a
+ * blank node (with a blank ID, or anonymously). Serd represents nodes as
+ * an unquoted UTF-8 string "value" associated with a @ref SerdNodeType,
+ * which preserves syntactic information allowing for lossless abbreviation.
+ * A non-abbreviating sink may simply consider @ref SERD_ANON_BEGIN and
+ * @ref SERD_ANON equivalent to SERD_BLANK_ID.
+ */
typedef enum {
- BLANK = 1, ///< Blank node (resource with no URI)
- URI = 2, ///< URI (universal identifier)
- QNAME = 3, ///< CURIE/QName (URI shortened with a namespace)
- LITERAL = 4 ///< Literal string (with optional lang or datatype)
+
+ /** Literal value. A literal optionally has either an associated language,
+ * or an associated datatype (not both).
+ */
+ SERD_LITERAL = 1,
+
+ /** URI. Value is a valid URI string (either absolute or relative), which
+ * is valid universally. See <http://tools.ietf.org/html/rfc3986>.
+ */
+ SERD_URI = 2,
+
+ /** CURIE, a shortened URI. Value is an unquoted UTF-8 CURIE string
+ * relative to the current environment, e.g. "rdf:type", which is valid
+ * only within this serialisation. See <http://www.w3.org/TR/curie>.
+ */
+ SERD_CURIE = 3,
+
+ /** A blank node ID. Value is a blank node identifier (e.g. "blank3"),
+ * which is valid only within this serialisation.
+ * See <http://www.w3.org/TeamSubmission/turtle#nodeID>.
+ */
+ SERD_BLANK_ID = 4,
+
+ /** The first reference to an anonymous (inlined) blank node.
+ * Value is identical to a @ref SERD_BLANK_ID value.
+ */
+ SERD_ANON_BEGIN = 5,
+
+ /** An anonymous blank node.
+ * Value is identical to a @ref SERD_BLANK_ID value.
+ */
+ SERD_ANON = 6
+
} SerdNodeType;
/** @name SerdURI
@@ -84,7 +122,7 @@ typedef struct {
/** A parsed URI.
* This struct directly refers to chunks in other strings, it does not own
- * any memory itself. Thus, URIs can be parsed and/or resolved against a
+ * any memory itself. Thus, URIs can be parsed and/or resolved against a
* base URI in-place without allocating memory.
*/
typedef struct {
@@ -218,6 +256,14 @@ typedef bool (*SerdStatementSink)(void* handle,
const SerdString* object_lang,
const SerdString* object_datatype);
+/** Sink for anonymous node end markers.
+ * This is called to indicate that the anonymous node with the given
+ * @a value will no longer be referred to by any future statements
+ * (i.e. the anonymous serialisation of the node is finished).
+ */
+typedef bool (*SerdEndSink)(void* handle,
+ const SerdString* value);
+
/** Create a new RDF reader. */
SERD_API
SerdReader
@@ -225,7 +271,8 @@ serd_reader_new(SerdSyntax syntax,
void* handle,
SerdBaseSink base_sink,
SerdPrefixSink prefix_sink,
- SerdStatementSink statement_sink);
+ SerdStatementSink statement_sink,
+ SerdEndSink end_sink);
/** Read @a file. */
SERD_API
@@ -247,8 +294,7 @@ serd_reader_free(SerdReader reader);
typedef enum {
SERD_STYLE_ABBREVIATED = 1,
- SERD_STYLE_ASCII = 1 << 1,
- SERD_STYLE_ESCAPE_
+ SERD_STYLE_ASCII = 1 << 1
} SerdStyle;
/** Create a new RDF writer. */
@@ -293,6 +339,12 @@ serd_writer_write_statement(SerdWriter writer,
const SerdString* object_datatype,
const SerdString* object_lang);
+/** Mark the end of an anonymous node's description. */
+SERD_API
+bool
+serd_writer_end_anon(SerdWriter writer,
+ const SerdString* subject);
+
/** Finish a write. */
SERD_API
void
diff --git a/src/reader.c b/src/reader.c
index ea941770..057775c8 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -24,12 +24,13 @@
#include <string.h>
#include "serd/serd.h"
+#include "serd_stack.h"
#define TRY_THROW(exp) if (!(exp)) goto except;
#define TRY_RET(exp) if (!(exp)) return 0;
-#define READ_BUF_LEN 4096
-#define STACK_CHUNK_SIZE 4096
+#define STACK_PAGE_SIZE 4096
+#define READ_BUF_LEN 4096
#ifndef NDEBUG
#define STACK_DEBUG 1
#endif
@@ -40,12 +41,6 @@ typedef struct {
unsigned col;
} Cursor;
-typedef struct {
- uint8_t* buf; ///< Stack memory
- size_t buf_size; ///< Allocated size of buf (>= size)
- size_t size; ///< Conceptual size of stack in buf
-} Stack;
-
typedef uint32_t uchar;
typedef size_t Ref;
@@ -61,7 +56,7 @@ typedef struct {
const Node* graph;
const Node* subject;
const Node* predicate;
-} Context;
+} ReadContext;
static const Node SERD_NODE_NULL = {0,0,0,0};
@@ -70,12 +65,13 @@ struct SerdReaderImpl {
SerdBaseSink base_sink;
SerdPrefixSink prefix_sink;
SerdStatementSink statement_sink;
+ SerdEndSink end_sink;
Node rdf_type;
Node rdf_first;
Node rdf_rest;
Node rdf_nil;
FILE* fd;
- Stack stack;
+ SerdStack stack;
Cursor cur;
uint8_t* buf;
unsigned next_id;
@@ -201,19 +197,6 @@ stack_is_top_string(SerdReader reader, Ref ref)
}
#endif
-static inline uint8_t*
-stack_push(SerdReader reader, size_t n_bytes)
-{
- const size_t new_size = reader->stack.size + n_bytes;
- if (reader->stack.buf_size < new_size) {
- reader->stack.buf_size = ((new_size / STACK_CHUNK_SIZE) + 1) * STACK_CHUNK_SIZE;
- reader->stack.buf = realloc(reader->stack.buf, reader->stack.buf_size);
- }
- uint8_t* const ret = (reader->stack.buf + reader->stack.size);
- reader->stack.size = new_size;
- return ret;
-}
-
static inline intptr_t
pad_size(intptr_t size)
{
@@ -228,7 +211,7 @@ push_string(SerdReader reader, const char* c_str, size_t n_bytes)
const size_t stack_size = pad_size((intptr_t)reader->stack.size);
const size_t pad = stack_size - reader->stack.size;
SerdString* const str = (SerdString*)(
- stack_push(reader, pad + sizeof(SerdString) + n_bytes) + pad);
+ serd_stack_push(&reader->stack, pad + sizeof(SerdString) + n_bytes) + pad);
str->n_bytes = n_bytes;
str->n_chars = n_bytes - 1;
memcpy(str->buf, c_str, n_bytes);
@@ -254,7 +237,7 @@ push_byte(SerdReader reader, Ref ref, const uint8_t c)
#ifdef STACK_DEBUG
assert(stack_is_top_string(reader, ref));
#endif
- stack_push(reader, 1);
+ serd_stack_push(&reader->stack, 1);
SerdString* const str = deref(reader, ref);
++str->n_bytes;
if ((c & 0xC0) != 0x80) {
@@ -284,7 +267,7 @@ pop_string(SerdReader reader, Ref ref)
assert(stack_is_top_string(reader, ref));
--reader->n_allocs;
#endif
- reader->stack.size -= deref(reader, ref)->n_bytes;
+ serd_stack_pop(&reader->stack, deref(reader, ref)->n_bytes);
}
}
@@ -292,6 +275,7 @@ static inline void
emit_statement(SerdReader reader,
const Node* g, const Node* s, const Node* p, const Node* o)
{
+ assert(s->value && p->value && o->value);
reader->statement_sink(reader->handle,
g ? deref(reader, g->value) : NULL,
deref(reader, s->value), s->type,
@@ -300,8 +284,8 @@ emit_statement(SerdReader reader,
deref(reader, o->datatype), deref(reader, o->lang));
}
-static bool read_collection(SerdReader reader, Context ctx, Node* dest);
-static bool read_predicateObjectList(SerdReader reader, Context ctx);
+static bool read_collection(SerdReader reader, ReadContext ctx, Node* dest);
+static bool read_predicateObjectList(SerdReader reader, ReadContext ctx);
// [40] hex ::= [#x30-#x39] | [#x41-#x46]
static inline uint8_t
@@ -882,7 +866,7 @@ read_number(SerdReader reader, Node* dest)
} else {
datatype = push_string(reader, XSD_INTEGER, strlen(XSD_INTEGER) + 1);
}
- *dest = make_node(LITERAL, str, datatype, 0);
+ *dest = make_node(SERD_LITERAL, str, datatype, 0);
assert(dest->value);
return true;
except:
@@ -897,10 +881,10 @@ read_resource(SerdReader reader, Node* dest)
{
switch (peek_byte(reader)) {
case '<':
- *dest = make_node(URI, read_uriref(reader), 0, 0);
+ *dest = make_node(SERD_URI, read_uriref(reader), 0, 0);
break;
default:
- *dest = make_node(QNAME, read_qname(reader), 0, 0);
+ *dest = make_node(SERD_CURIE, read_qname(reader), 0, 0);
}
return (dest->value != 0);
}
@@ -932,9 +916,9 @@ read_literal(SerdReader reader, Node* dest)
eat_byte(reader, '@');
TRY_THROW(lang = read_language(reader));
}
- *dest = make_node(LITERAL, str, datatype.value, lang);
+ *dest = make_node(SERD_LITERAL, str, datatype.value, lang);
} else {
- *dest = make_node(QNAME, read_qname(reader), 0, 0);
+ *dest = make_node(SERD_CURIE, read_qname(reader), 0, 0);
}
return true;
except:
@@ -961,7 +945,7 @@ read_verb(SerdReader reader, Node* dest)
switch (pre[1]) {
case 0x9: case 0xA: case 0xD: case 0x20:
eat_byte(reader, 'a');
- *dest = make_node(URI, push_string(reader, RDF_TYPE, 48), 0, 0);
+ *dest = make_node(SERD_URI, push_string(reader, RDF_TYPE, 48), 0, 0);
return true;
default: break; // fall through
}
@@ -991,30 +975,34 @@ blank_id(SerdReader reader)
// Spec: [21] blank ::= nodeID | '[]' | '[' predicateObjectList ']' | collection
// Actual: [21] blank ::= nodeID | '[ ws* ]' | '[' ws* predicateObjectList ws* ']' | collection
static bool
-read_blank(SerdReader reader, Context ctx, Node* dest)
+read_blank(SerdReader reader, ReadContext ctx, Node* dest)
{
switch (peek_byte(reader)) {
case '_':
- *dest = make_node(BLANK, read_nodeID(reader), 0, 0);
+ *dest = make_node(SERD_BLANK_ID, read_nodeID(reader), 0, 0);
return true;
case '[':
eat_byte(reader, '[');
read_ws_star(reader);
if (peek_byte(reader) == ']') {
eat_byte(reader, ']');
- *dest = make_node(BLANK, blank_id(reader), 0, 0);
+ *dest = make_node(SERD_BLANK_ID, blank_id(reader), 0, 0);
if (ctx.subject) {
emit_statement(reader, ctx.graph, ctx.subject, ctx.predicate, dest);
}
- } else {
- *dest = make_node(BLANK, blank_id(reader), 0, 0);
- if (ctx.subject) {
- emit_statement(reader, ctx.graph, ctx.subject, ctx.predicate, dest);
- }
- ctx.subject = dest;
- read_predicateObjectList(reader, ctx);
- read_ws_star(reader);
- eat_byte(reader, ']');
+ return true;
+ }
+ *dest = make_node(SERD_ANON_BEGIN, blank_id(reader), 0, 0);
+ if (ctx.subject) {
+ emit_statement(reader, ctx.graph, ctx.subject, ctx.predicate, dest);
+ dest->type = SERD_ANON;
+ }
+ ctx.subject = dest;
+ read_predicateObjectList(reader, ctx);
+ read_ws_star(reader);
+ eat_byte(reader, ']');
+ if (reader->end_sink) {
+ reader->end_sink(reader->handle, deref(reader, dest->value));
}
return true;
case '(':
@@ -1046,14 +1034,14 @@ is_object_end(const uint8_t c)
// Recurses, calling statement_sink for every statement encountered.
// Leaves stack in original calling state (i.e. pops everything it pushes).
static bool
-read_object(SerdReader reader, Context ctx)
+read_object(SerdReader reader, ReadContext ctx)
{
static const char* const XSD_BOOLEAN = "http://www.w3.org/2001/XMLSchema#boolean";
static const size_t XSD_BOOLEAN_LEN = 40;
uint8_t pre[6];
bool ret = false;
- bool emit = true;
+ bool emit = (ctx.subject != 0);
Node o = SERD_NODE_NULL;
const uint8_t c = peek_byte(reader);
switch (c) {
@@ -1086,14 +1074,14 @@ read_object(SerdReader reader, Context ctx)
eat_string(reader, "true", 4);
const Ref value = push_string(reader, "true", 5);
const Ref datatype = push_string(reader, XSD_BOOLEAN, XSD_BOOLEAN_LEN + 1);
- o = make_node(LITERAL, value, datatype, 0);
+ o = make_node(SERD_LITERAL, value, datatype, 0);
} else if (!memcmp(pre, "false", 5) && is_object_end(pre[5])) {
eat_string(reader, "false", 5);
const Ref value = push_string(reader, "false", 6);
const Ref datatype = push_string(reader, XSD_BOOLEAN, XSD_BOOLEAN_LEN + 1);
- o = make_node(LITERAL, value, datatype, 0);
+ o = make_node(SERD_LITERAL, value, datatype, 0);
} else if (!is_object_end(c)) {
- o = make_node(QNAME, read_qname(reader), 0, 0);
+ o = make_node(SERD_CURIE, read_qname(reader), 0, 0);
}
ret = o.value;
}
@@ -1113,7 +1101,7 @@ except:
// Spec: [8] objectList ::= object ( ',' object )*
// Actual: [8] objectList ::= object ( ws* ',' ws* object )*
static bool
-read_objectList(SerdReader reader, Context ctx)
+read_objectList(SerdReader reader, ReadContext ctx)
{
TRY_RET(read_object(reader, ctx));
read_ws_star(reader);
@@ -1129,7 +1117,7 @@ read_objectList(SerdReader reader, Context ctx)
// Spec: [7] predicateObjectList ::= verb objectList ( ';' verb objectList )* ( ';' )?
// Actual: [7] predicateObjectList ::= verb ws+ objectList ( ws* ';' ws* verb ws+ objectList )* ( ';' )?
static bool
-read_predicateObjectList(SerdReader reader, Context ctx)
+read_predicateObjectList(SerdReader reader, ReadContext ctx)
{
if (reader->eof) {
return false;
@@ -1166,7 +1154,7 @@ except:
/** Recursive helper for read_collection. */
static bool
-read_collection_rec(SerdReader reader, Context ctx)
+read_collection_rec(SerdReader reader, ReadContext ctx)
{
read_ws_star(reader);
if (peek_byte(reader) == ')') {
@@ -1174,7 +1162,7 @@ read_collection_rec(SerdReader reader, Context ctx)
emit_statement(reader, NULL, ctx.subject, &reader->rdf_rest, &reader->rdf_nil);
return false;
} else {
- const Node rest = make_node(BLANK, blank_id(reader), 0, 0);
+ const Node rest = make_node(SERD_BLANK_ID, blank_id(reader), 0, 0);
emit_statement(reader, ctx.graph, ctx.subject, &reader->rdf_rest, &rest);
ctx.subject = &rest;
ctx.predicate = &reader->rdf_first;
@@ -1192,7 +1180,7 @@ read_collection_rec(SerdReader reader, Context ctx)
// [22] itemList ::= object+
// [23] collection ::= '(' itemList? ')'
static bool
-read_collection(SerdReader reader, Context ctx, Node* dest)
+read_collection(SerdReader reader, ReadContext ctx, Node* dest)
{
TRY_RET(eat_byte(reader, '('));
read_ws_star(reader);
@@ -1202,7 +1190,7 @@ read_collection(SerdReader reader, Context ctx, Node* dest)
return true;
}
- *dest = make_node(BLANK, blank_id(reader), 0, 0);
+ *dest = make_node(SERD_BLANK_ID, blank_id(reader), 0, 0);
ctx.subject = dest;
ctx.predicate = &reader->rdf_first;
if (!read_object(reader, ctx)) {
@@ -1216,7 +1204,7 @@ read_collection(SerdReader reader, Context ctx, Node* dest)
// [11] subject ::= resource | blank
static Node
-read_subject(SerdReader reader, Context ctx)
+read_subject(SerdReader reader, ReadContext ctx)
{
Node subject = SERD_NODE_NULL;
switch (peek_byte(reader)) {
@@ -1232,17 +1220,18 @@ read_subject(SerdReader reader, Context ctx)
// Spec: [6] triples ::= subject predicateObjectList
// Actual: [6] triples ::= subject ws+ predicateObjectList
static bool
-read_triples(SerdReader reader, Context ctx)
+read_triples(SerdReader reader, ReadContext ctx)
{
const Node subject = read_subject(reader, ctx);
+ bool ret = false;
if (subject.value != 0) {
ctx.subject = &subject;
TRY_RET(read_ws_plus(reader));
- const bool ret = read_predicateObjectList(reader, ctx);
+ ret = read_predicateObjectList(reader, ctx);
pop_string(reader, subject.value);
- return ret;
}
- return false;
+ ctx.subject = ctx.predicate = 0;
+ return ret;
}
// [5] base ::= '@base' ws+ uriref
@@ -1305,7 +1294,7 @@ read_directive(SerdReader reader)
static bool
read_statement(SerdReader reader)
{
- Context ctx = { 0, 0, 0 };
+ ReadContext ctx = { 0, 0, 0 };
read_ws_star(reader);
if (reader->eof) {
return true;
@@ -1338,7 +1327,8 @@ serd_reader_new(SerdSyntax syntax,
void* handle,
SerdBaseSink base_sink,
SerdPrefixSink prefix_sink,
- SerdStatementSink statement_sink)
+ SerdStatementSink statement_sink,
+ SerdEndSink end_sink)
{
const Cursor cur = { NULL, 0, 0 };
SerdReader reader = malloc(sizeof(struct SerdReaderImpl));
@@ -1346,10 +1336,9 @@ serd_reader_new(SerdSyntax syntax,
reader->base_sink = base_sink;
reader->prefix_sink = prefix_sink;
reader->statement_sink = statement_sink;
+ reader->end_sink = end_sink;
reader->fd = 0;
- reader->stack.buf = malloc(STACK_CHUNK_SIZE);
- reader->stack.buf_size = STACK_CHUNK_SIZE;
- reader->stack.size = 8;
+ reader->stack = serd_stack_new(STACK_PAGE_SIZE);
reader->cur = cur;
reader->next_id = 1;
reader->err = 0;
@@ -1396,9 +1385,9 @@ serd_reader_read_file(SerdReader reader, FILE* file, const uint8_t* name)
me->fd = file;
me->cur = cur;
- me->rdf_first = make_node(URI, push_string(me, RDF_FIRST, 49), 0, 0);
- me->rdf_rest = make_node(URI, push_string(me, RDF_REST, 48), 0, 0);
- me->rdf_nil = make_node(URI, push_string(me, RDF_NIL, 47), 0, 0);
+ me->rdf_first = make_node(SERD_URI, push_string(me, RDF_FIRST, 49), 0, 0);
+ me->rdf_rest = make_node(SERD_URI, push_string(me, RDF_REST, 48), 0, 0);
+ me->rdf_nil = make_node(SERD_URI, push_string(me, RDF_NIL, 47), 0, 0);
fread(me->read_buf, 1, READ_BUF_LEN, file);
const bool ret = read_turtleDoc(me);
diff --git a/src/serd_stack.h b/src/serd_stack.h
new file mode 100644
index 00000000..95289e64
--- /dev/null
+++ b/src/serd_stack.h
@@ -0,0 +1,79 @@
+/* Serd, an RDF serialisation library.
+ * Copyright 2011 David Robillard <d@drobilla.net>
+ *
+ * Serd is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Serd is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef SERD_STACK_H
+#define SERD_STACK_H
+
+#include <assert.h>
+
+#include "serd/serd.h"
+
+/** An offset to start the stack at. Note 0 is reserved for NULL. */
+#define SERD_STACK_BOTTOM sizeof(void*)
+
+typedef struct {
+ uint8_t* buf; ///< Stack memory
+ size_t buf_size; ///< Allocated size of buf (>= size)
+ size_t size; ///< Conceptual size of stack in buf
+} SerdStack;
+
+static inline SerdStack
+serd_stack_new(size_t size)
+{
+ SerdStack stack;
+ stack.buf = malloc(size);
+ stack.buf_size = size;
+ stack.size = SERD_STACK_BOTTOM;
+ return stack;
+}
+
+static inline bool
+serd_stack_is_empty(SerdStack* stack)
+{
+ return stack->size <= SERD_STACK_BOTTOM;
+}
+
+static inline void
+serd_stack_free(SerdStack* stack)
+{
+ free(stack->buf);
+ stack->buf = NULL;
+ stack->buf_size = 0;
+ stack->size = 0;
+}
+
+static inline uint8_t*
+serd_stack_push(SerdStack* stack, size_t n_bytes)
+{
+ const size_t new_size = stack->size + n_bytes;
+ if (stack->buf_size < new_size) {
+ stack->buf_size *= 2;
+ stack->buf = realloc(stack->buf, stack->buf_size);
+ }
+ uint8_t* const ret = (stack->buf + stack->size);
+ stack->size = new_size;
+ return ret;
+}
+
+static inline void
+serd_stack_pop(SerdStack* stack, size_t n_bytes)
+{
+ assert(stack->size >= n_bytes);
+ stack->size -= n_bytes;
+}
+
+#endif // SERD_STACK_H
diff --git a/src/serdi.c b/src/serdi.c
index 73653792..2aa0a08e 100644
--- a/src/serdi.c
+++ b/src/serdi.c
@@ -115,6 +115,14 @@ event_statement(void* handle,
object, object_type, object_datatype, object_lang);
}
+static bool
+event_end(void* handle,
+ const SerdString* subject)
+{
+ State* const state = (State*)handle;
+ return serd_writer_end_anon(state->writer, subject);
+}
+
int
print_usage(const char* name, bool error)
{
@@ -218,7 +226,7 @@ main(int argc, char** argv)
};
SerdReader reader = serd_reader_new(
- SERD_TURTLE, &state, event_base, event_prefix, event_statement);
+ SERD_TURTLE, &state, event_base, event_prefix, event_statement, event_end);
const bool success = serd_reader_read_file(reader, in_fd, in_filename);
serd_reader_free(reader);
diff --git a/src/writer.c b/src/writer.c
index 1e50b7b0..3e91bcd2 100644
--- a/src/writer.c
+++ b/src/writer.c
@@ -20,6 +20,15 @@
#include <stdlib.h>
#include "serd/serd.h"
+#include "serd_stack.h"
+
+typedef struct {
+ const SerdString* graph;
+ const SerdString* subject;
+ const SerdString* predicate;
+} WriteContext;
+
+static const WriteContext WRITE_CONTEXT_NULL = { 0, 0, 0 };
typedef bool (*StatementWriter)(SerdWriter writer,
const SerdString* graph,
@@ -43,13 +52,12 @@ struct SerdWriterImpl {
SerdStyle style;
SerdEnv env;
SerdURI base_uri;
+ SerdStack anon_stack;
SerdSink sink;
void* stream;
StatementWriter write_statement;
NodeWriter write_node;
- const SerdString* prev_g;
- const SerdString* prev_s;
- const SerdString* prev_p;
+ WriteContext context;
unsigned indent;
};
@@ -57,10 +65,18 @@ typedef enum {
WRITE_NORMAL,
WRITE_URI,
WRITE_STRING
-} WriteContext;
+} TextContext;
+
+static inline WriteContext*
+anon_stack_top(SerdWriter writer)
+{
+ assert(!serd_stack_is_empty(&writer->anon_stack));
+ return (WriteContext*)(writer->anon_stack.buf
+ + writer->anon_stack.size - sizeof(WriteContext));
+}
static bool
-write_text(SerdWriter writer, WriteContext ctx,
+write_text(SerdWriter writer, TextContext ctx,
const uint8_t* utf8, size_t n_bytes, uint8_t terminator)
{
char escape[10] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
@@ -147,6 +163,24 @@ write_text(SerdWriter writer, WriteContext ctx,
return true;
}
+static void
+serd_writer_write_delim(SerdWriter writer, const uint8_t delim)
+{
+ switch (delim) {
+ case 0:
+ case '\n':
+ break;
+ default:
+ writer->sink(" ", 1, writer->stream);
+ case '[':
+ writer->sink(&delim, 1, writer->stream);
+ }
+ writer->sink("\n", 1, writer->stream);
+ for (unsigned i = 0; i < writer->indent; ++i) {
+ writer->sink("\t", 1, writer->stream);
+ }
+}
+
static bool
write_node(SerdWriter writer,
SerdNodeType type,
@@ -157,11 +191,26 @@ write_node(SerdWriter writer,
SerdChunk uri_prefix;
SerdChunk uri_suffix;
switch (type) {
- case BLANK:
+ case SERD_ANON_BEGIN:
+ if (writer->syntax != SERD_NTRIPLES) {
+ ++writer->indent;
+ serd_writer_write_delim(writer, '[');
+ WriteContext* ctx = (WriteContext*)serd_stack_push(
+ &writer->anon_stack, sizeof(WriteContext));
+ *ctx = writer->context;
+ writer->context.subject = str;
+ writer->context.predicate = 0;
+ break;
+ }
+ case SERD_ANON:
+ if (writer->syntax != SERD_NTRIPLES) {
+ break;
+ } // else fall through
+ case SERD_BLANK_ID:
writer->sink("_:", 2, writer->stream);
writer->sink(str->buf, str->n_bytes - 1, writer->stream);
break;
- case QNAME:
+ case SERD_CURIE:
switch (writer->syntax) {
case SERD_NTRIPLES:
if (!serd_env_expand(writer->env, str, &uri_prefix, &uri_suffix)) {
@@ -177,7 +226,19 @@ write_node(SerdWriter writer,
writer->sink(str->buf, str->n_bytes - 1, writer->stream);
}
break;
- case URI:
+ case SERD_LITERAL:
+ writer->sink("\"", 1, writer->stream);
+ write_text(writer, WRITE_STRING, str->buf, str->n_bytes - 1, '"');
+ writer->sink("\"", 1, writer->stream);
+ if (lang) {
+ writer->sink("@", 1, writer->stream);
+ writer->sink(lang->buf, lang->n_bytes - 1, writer->stream);
+ } else if (datatype) {
+ writer->sink("^^", 2, writer->stream);
+ write_node(writer, SERD_URI, datatype, NULL, NULL);
+ }
+ break;
+ case SERD_URI:
if (!serd_uri_string_has_scheme(str->buf)) {
SerdURI uri;
if (serd_uri_parse(str->buf, &uri)) {
@@ -196,39 +257,10 @@ write_node(SerdWriter writer,
return true;
}
return false;
- case LITERAL:
- writer->sink("\"", 1, writer->stream);
- write_text(writer, WRITE_STRING, str->buf, str->n_bytes - 1, '"');
- writer->sink("\"", 1, writer->stream);
- if (lang) {
- writer->sink("@", 1, writer->stream);
- writer->sink(lang->buf, lang->n_bytes - 1, writer->stream);
- } else if (datatype) {
- writer->sink("^^", 2, writer->stream);
- write_node(writer, URI, datatype, NULL, NULL);
- }
- break;
}
return true;
}
-static void
-serd_writer_write_delim(SerdWriter writer, const uint8_t delim)
-{
- switch (delim) {
- case 0:
- case '\n':
- break;
- default:
- writer->sink(" ", 1, writer->stream);
- writer->sink(&delim, 1, writer->stream);
- }
- writer->sink("\n", 1, writer->stream);
- for (unsigned i = 0; i < writer->indent; ++i) {
- writer->sink("\t", 1, writer->stream);
- }
-}
-
SERD_API
bool
serd_writer_write_statement(SerdWriter writer,
@@ -262,38 +294,63 @@ serd_writer_write_statement_abbrev(SerdWriter writer,
const SerdString* object_lang)
{
assert(subject && predicate && object);
- if (subject == writer->prev_s) {
- if (predicate == writer->prev_p) {
+ if (subject == writer->context.subject) {
+ if (predicate == writer->context.predicate) {
++writer->indent;
serd_writer_write_delim(writer, ',');
write_node(writer, object_type, object, object_datatype, object_lang);
--writer->indent;
} else {
- serd_writer_write_delim(writer, ';');
+ if (writer->context.predicate) {
+ serd_writer_write_delim(writer, ';');
+ } else {
+ ++writer->indent;
+ serd_writer_write_delim(writer, '\n');
+ }
write_node(writer, predicate_type, predicate, NULL, NULL);
+ writer->context.predicate = predicate;
writer->sink(" ", 1, writer->stream);
write_node(writer, object_type, object, object_datatype, object_lang);
}
} else {
- if (writer->prev_s) {
- --writer->indent;
- serd_writer_write_delim(writer, '.');
- serd_writer_write_delim(writer, '\n');
+ if (writer->context.subject) {
+ if (writer->indent > 0) {
+ --writer->indent;
+ }
+ if (serd_stack_is_empty(&writer->anon_stack)) {
+ serd_writer_write_delim(writer, '.');
+ serd_writer_write_delim(writer, '\n');
+ }
}
- write_node(writer, subject_type, subject, NULL, NULL);
- ++writer->indent;
- serd_writer_write_delim(writer, 0);
- writer->sink(" ", 1, writer->stream);
+ if (subject_type == SERD_ANON_BEGIN) {
+ writer->sink("[ ", 2, writer->stream);
+ ++writer->indent;
+ WriteContext* ctx = (WriteContext*)serd_stack_push(
+ &writer->anon_stack, sizeof(WriteContext));
+ *ctx = writer->context;
+ writer->context.subject = subject;
+ writer->context.predicate = 0;
+ } else {
+ write_node(writer, subject_type, subject, NULL, NULL);
+ ++writer->indent;
+ if (subject_type != SERD_ANON_BEGIN && subject_type != SERD_ANON) {
+ serd_writer_write_delim(writer, '\n');
+ }
+ }
+
+ writer->context.subject = subject;
+ writer->context.predicate = 0;
+
write_node(writer, predicate_type, predicate, NULL, NULL);
+ writer->context.predicate = predicate;
writer->sink(" ", 1, writer->stream);
write_node(writer, object_type, object, object_datatype, object_lang);
}
- writer->prev_g = graph;
- writer->prev_s = subject;
- writer->prev_p = predicate;
+ const WriteContext new_context = { graph, subject, predicate };
+ writer->context = new_context;
return true;
}
@@ -321,10 +378,36 @@ serd_writer_write_statement_flat(SerdWriter writer,
}
SERD_API
+bool
+serd_writer_end_anon(SerdWriter writer,
+ const SerdString* subject)
+{
+ if (writer->syntax == SERD_NTRIPLES) {
+ return true;
+ }
+ if (serd_stack_is_empty(&writer->anon_stack)) {
+ fprintf(stderr, "unexpected SERD_END received\n");
+ return false;
+ }
+ assert(writer->indent > 0);
+ --writer->indent;
+ serd_writer_write_delim(writer, '\n');
+ writer->sink("]", 1, writer->stream);
+ writer->context = *anon_stack_top(writer);
+ serd_stack_pop(&writer->anon_stack, sizeof(WriteContext));
+ if (serd_stack_is_empty(&writer->anon_stack)) {
+ // End of anonymous subject, reset context
+ writer->context.subject = subject;
+ writer->context.predicate = 0;
+ }
+ return true;
+}
+
+SERD_API
void
serd_writer_finish(SerdWriter writer)
{
- if (writer->prev_s) {
+ if (writer->context.subject) {
writer->sink(" .\n", 3, writer->stream);
}
}
@@ -338,17 +421,17 @@ serd_writer_new(SerdSyntax syntax,
SerdSink sink,
void* stream)
{
- SerdWriter writer = malloc(sizeof(struct SerdWriterImpl));
- writer->syntax = syntax;
- writer->style = style;
- writer->env = env;
- writer->base_uri = *base_uri;
- writer->sink = sink;
- writer->stream = stream;
- writer->prev_g = 0;
- writer->prev_s = 0;
- writer->prev_p = 0;
- writer->indent = 0;
+ const WriteContext context = WRITE_CONTEXT_NULL;
+ SerdWriter writer = malloc(sizeof(struct SerdWriterImpl));
+ writer->syntax = syntax;
+ writer->style = style;
+ writer->env = env;
+ writer->base_uri = *base_uri;
+ writer->anon_stack = serd_stack_new(sizeof(WriteContext));
+ writer->sink = sink;
+ writer->stream = stream;
+ writer->context = context;
+ writer->indent = 0;
writer->write_node = write_node;
if ((style & SERD_STYLE_ABBREVIATED)) {
writer->write_statement = serd_writer_write_statement_abbrev;
@@ -365,9 +448,9 @@ serd_writer_set_base_uri(SerdWriter writer,
{
writer->base_uri = *uri;
if (writer->syntax != SERD_NTRIPLES) {
- if (writer->prev_g || writer->prev_s) {
+ if (writer->context.graph || writer->context.subject) {
writer->sink(" .\n\n", 4, writer->stream);
- writer->prev_g = writer->prev_s = writer->prev_p = 0;
+ writer->context = WRITE_CONTEXT_NULL;
}
writer->sink("@base ", 6, writer->stream);
writer->sink(" <", 2, writer->stream);
@@ -383,11 +466,9 @@ serd_writer_set_prefix(SerdWriter writer,
const SerdString* uri)
{
if (writer->syntax != SERD_NTRIPLES) {
- if (writer->prev_g || writer->prev_s) {
+ if (writer->context.graph || writer->context.subject) {
writer->sink(" .\n\n", 4, writer->stream);
- writer->prev_g = 0;
- writer->prev_s = 0;
- writer->prev_p = 0;
+ writer->context = WRITE_CONTEXT_NULL;
}
writer->sink("@prefix ", 8, writer->stream);
writer->sink(name->buf, name->n_bytes - 1, writer->stream);
@@ -402,5 +483,6 @@ void
serd_writer_free(SerdWriter writer)
{
SerdWriter const me = (SerdWriter)writer;
+ serd_stack_free(&writer->anon_stack);
free(me);
}