diff options
author | David Robillard <d@drobilla.net> | 2011-01-23 07:44:29 +0000 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2011-01-23 07:44:29 +0000 |
commit | 983c964a11919f68f62d0a2193204789c27dc99c (patch) | |
tree | e15fb23b4ec7776f81504082a23f8c1d1896f9d5 | |
parent | d7ebb11e67fe6456d89bac6d173c271398fa6d0e (diff) | |
download | serd-983c964a11919f68f62d0a2193204789c27dc99c.tar.gz serd-983c964a11919f68f62d0a2193204789c27dc99c.tar.bz2 serd-983c964a11919f68f62d0a2193204789c27dc99c.zip |
Streaming abbreviation.
git-svn-id: http://svn.drobilla.net/serd/trunk@41 490d8e77-9747-427b-9fa3-0b8f29cee8a0
-rw-r--r-- | serd/serd.h | 78 | ||||
-rw-r--r-- | src/reader.c | 129 | ||||
-rw-r--r-- | src/serd_stack.h | 79 | ||||
-rw-r--r-- | src/serdi.c | 10 | ||||
-rw-r--r-- | src/writer.c | 220 |
5 files changed, 363 insertions, 153 deletions
diff --git a/serd/serd.h b/serd/serd.h index 295baee0..8f8dfdf3 100644 --- a/serd/serd.h +++ b/serd/serd.h @@ -8,11 +8,11 @@ * * Serd is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public * License for details. * * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. + * along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* @file @@ -59,16 +59,54 @@ typedef struct SerdWriterImpl* SerdWriter; /**< RDF writer. */ /** RDF syntax */ typedef enum { - SERD_TURTLE = 1, - SERD_NTRIPLES = 2 + SERD_TURTLE = 1, /**< <http://www.w3.org/TeamSubmission/turtle/> */ + SERD_NTRIPLES = 2 /**< <http://www.w3.org/TR/rdf-testcases/#ntriples> */ } SerdSyntax; -/** Type of RDF node. */ +/** Type of a syntactic RDF node. + * This is more precise than the type of an abstract RDF node. An abstract + * node is either a resource, literal, or blank. In syntax there are two + * ways to refer to a resource (URI or CURIE), and two ways to refer to a + * blank node (with a blank ID, or anonymously). Serd represents nodes as + * an unquoted UTF-8 string "value" associated with a @ref SerdNodeType, + * which preserves syntactic information allowing for lossless abbreviation. + * A non-abbreviating sink may simply consider @ref SERD_ANON_BEGIN and + * @ref SERD_ANON equivalent to SERD_BLANK_ID. + */ typedef enum { - BLANK = 1, ///< Blank node (resource with no URI) - URI = 2, ///< URI (universal identifier) - QNAME = 3, ///< CURIE/QName (URI shortened with a namespace) - LITERAL = 4 ///< Literal string (with optional lang or datatype) + + /** Literal value. A literal optionally has either an associated language, + * or an associated datatype (not both). + */ + SERD_LITERAL = 1, + + /** URI. Value is a valid URI string (either absolute or relative), which + * is valid universally. See <http://tools.ietf.org/html/rfc3986>. + */ + SERD_URI = 2, + + /** CURIE, a shortened URI. Value is an unquoted UTF-8 CURIE string + * relative to the current environment, e.g. "rdf:type", which is valid + * only within this serialisation. See <http://www.w3.org/TR/curie>. + */ + SERD_CURIE = 3, + + /** A blank node ID. Value is a blank node identifier (e.g. "blank3"), + * which is valid only within this serialisation. + * See <http://www.w3.org/TeamSubmission/turtle#nodeID>. + */ + SERD_BLANK_ID = 4, + + /** The first reference to an anonymous (inlined) blank node. + * Value is identical to a @ref SERD_BLANK_ID value. + */ + SERD_ANON_BEGIN = 5, + + /** An anonymous blank node. + * Value is identical to a @ref SERD_BLANK_ID value. + */ + SERD_ANON = 6 + } SerdNodeType; /** @name SerdURI @@ -84,7 +122,7 @@ typedef struct { /** A parsed URI. * This struct directly refers to chunks in other strings, it does not own - * any memory itself. Thus, URIs can be parsed and/or resolved against a + * any memory itself. Thus, URIs can be parsed and/or resolved against a * base URI in-place without allocating memory. */ typedef struct { @@ -218,6 +256,14 @@ typedef bool (*SerdStatementSink)(void* handle, const SerdString* object_lang, const SerdString* object_datatype); +/** Sink for anonymous node end markers. + * This is called to indicate that the anonymous node with the given + * @a value will no longer be referred to by any future statements + * (i.e. the anonymous serialisation of the node is finished). + */ +typedef bool (*SerdEndSink)(void* handle, + const SerdString* value); + /** Create a new RDF reader. */ SERD_API SerdReader @@ -225,7 +271,8 @@ serd_reader_new(SerdSyntax syntax, void* handle, SerdBaseSink base_sink, SerdPrefixSink prefix_sink, - SerdStatementSink statement_sink); + SerdStatementSink statement_sink, + SerdEndSink end_sink); /** Read @a file. */ SERD_API @@ -247,8 +294,7 @@ serd_reader_free(SerdReader reader); typedef enum { SERD_STYLE_ABBREVIATED = 1, - SERD_STYLE_ASCII = 1 << 1, - SERD_STYLE_ESCAPE_ + SERD_STYLE_ASCII = 1 << 1 } SerdStyle; /** Create a new RDF writer. */ @@ -293,6 +339,12 @@ serd_writer_write_statement(SerdWriter writer, const SerdString* object_datatype, const SerdString* object_lang); +/** Mark the end of an anonymous node's description. */ +SERD_API +bool +serd_writer_end_anon(SerdWriter writer, + const SerdString* subject); + /** Finish a write. */ SERD_API void diff --git a/src/reader.c b/src/reader.c index ea941770..057775c8 100644 --- a/src/reader.c +++ b/src/reader.c @@ -24,12 +24,13 @@ #include <string.h> #include "serd/serd.h" +#include "serd_stack.h" #define TRY_THROW(exp) if (!(exp)) goto except; #define TRY_RET(exp) if (!(exp)) return 0; -#define READ_BUF_LEN 4096 -#define STACK_CHUNK_SIZE 4096 +#define STACK_PAGE_SIZE 4096 +#define READ_BUF_LEN 4096 #ifndef NDEBUG #define STACK_DEBUG 1 #endif @@ -40,12 +41,6 @@ typedef struct { unsigned col; } Cursor; -typedef struct { - uint8_t* buf; ///< Stack memory - size_t buf_size; ///< Allocated size of buf (>= size) - size_t size; ///< Conceptual size of stack in buf -} Stack; - typedef uint32_t uchar; typedef size_t Ref; @@ -61,7 +56,7 @@ typedef struct { const Node* graph; const Node* subject; const Node* predicate; -} Context; +} ReadContext; static const Node SERD_NODE_NULL = {0,0,0,0}; @@ -70,12 +65,13 @@ struct SerdReaderImpl { SerdBaseSink base_sink; SerdPrefixSink prefix_sink; SerdStatementSink statement_sink; + SerdEndSink end_sink; Node rdf_type; Node rdf_first; Node rdf_rest; Node rdf_nil; FILE* fd; - Stack stack; + SerdStack stack; Cursor cur; uint8_t* buf; unsigned next_id; @@ -201,19 +197,6 @@ stack_is_top_string(SerdReader reader, Ref ref) } #endif -static inline uint8_t* -stack_push(SerdReader reader, size_t n_bytes) -{ - const size_t new_size = reader->stack.size + n_bytes; - if (reader->stack.buf_size < new_size) { - reader->stack.buf_size = ((new_size / STACK_CHUNK_SIZE) + 1) * STACK_CHUNK_SIZE; - reader->stack.buf = realloc(reader->stack.buf, reader->stack.buf_size); - } - uint8_t* const ret = (reader->stack.buf + reader->stack.size); - reader->stack.size = new_size; - return ret; -} - static inline intptr_t pad_size(intptr_t size) { @@ -228,7 +211,7 @@ push_string(SerdReader reader, const char* c_str, size_t n_bytes) const size_t stack_size = pad_size((intptr_t)reader->stack.size); const size_t pad = stack_size - reader->stack.size; SerdString* const str = (SerdString*)( - stack_push(reader, pad + sizeof(SerdString) + n_bytes) + pad); + serd_stack_push(&reader->stack, pad + sizeof(SerdString) + n_bytes) + pad); str->n_bytes = n_bytes; str->n_chars = n_bytes - 1; memcpy(str->buf, c_str, n_bytes); @@ -254,7 +237,7 @@ push_byte(SerdReader reader, Ref ref, const uint8_t c) #ifdef STACK_DEBUG assert(stack_is_top_string(reader, ref)); #endif - stack_push(reader, 1); + serd_stack_push(&reader->stack, 1); SerdString* const str = deref(reader, ref); ++str->n_bytes; if ((c & 0xC0) != 0x80) { @@ -284,7 +267,7 @@ pop_string(SerdReader reader, Ref ref) assert(stack_is_top_string(reader, ref)); --reader->n_allocs; #endif - reader->stack.size -= deref(reader, ref)->n_bytes; + serd_stack_pop(&reader->stack, deref(reader, ref)->n_bytes); } } @@ -292,6 +275,7 @@ static inline void emit_statement(SerdReader reader, const Node* g, const Node* s, const Node* p, const Node* o) { + assert(s->value && p->value && o->value); reader->statement_sink(reader->handle, g ? deref(reader, g->value) : NULL, deref(reader, s->value), s->type, @@ -300,8 +284,8 @@ emit_statement(SerdReader reader, deref(reader, o->datatype), deref(reader, o->lang)); } -static bool read_collection(SerdReader reader, Context ctx, Node* dest); -static bool read_predicateObjectList(SerdReader reader, Context ctx); +static bool read_collection(SerdReader reader, ReadContext ctx, Node* dest); +static bool read_predicateObjectList(SerdReader reader, ReadContext ctx); // [40] hex ::= [#x30-#x39] | [#x41-#x46] static inline uint8_t @@ -882,7 +866,7 @@ read_number(SerdReader reader, Node* dest) } else { datatype = push_string(reader, XSD_INTEGER, strlen(XSD_INTEGER) + 1); } - *dest = make_node(LITERAL, str, datatype, 0); + *dest = make_node(SERD_LITERAL, str, datatype, 0); assert(dest->value); return true; except: @@ -897,10 +881,10 @@ read_resource(SerdReader reader, Node* dest) { switch (peek_byte(reader)) { case '<': - *dest = make_node(URI, read_uriref(reader), 0, 0); + *dest = make_node(SERD_URI, read_uriref(reader), 0, 0); break; default: - *dest = make_node(QNAME, read_qname(reader), 0, 0); + *dest = make_node(SERD_CURIE, read_qname(reader), 0, 0); } return (dest->value != 0); } @@ -932,9 +916,9 @@ read_literal(SerdReader reader, Node* dest) eat_byte(reader, '@'); TRY_THROW(lang = read_language(reader)); } - *dest = make_node(LITERAL, str, datatype.value, lang); + *dest = make_node(SERD_LITERAL, str, datatype.value, lang); } else { - *dest = make_node(QNAME, read_qname(reader), 0, 0); + *dest = make_node(SERD_CURIE, read_qname(reader), 0, 0); } return true; except: @@ -961,7 +945,7 @@ read_verb(SerdReader reader, Node* dest) switch (pre[1]) { case 0x9: case 0xA: case 0xD: case 0x20: eat_byte(reader, 'a'); - *dest = make_node(URI, push_string(reader, RDF_TYPE, 48), 0, 0); + *dest = make_node(SERD_URI, push_string(reader, RDF_TYPE, 48), 0, 0); return true; default: break; // fall through } @@ -991,30 +975,34 @@ blank_id(SerdReader reader) // Spec: [21] blank ::= nodeID | '[]' | '[' predicateObjectList ']' | collection // Actual: [21] blank ::= nodeID | '[ ws* ]' | '[' ws* predicateObjectList ws* ']' | collection static bool -read_blank(SerdReader reader, Context ctx, Node* dest) +read_blank(SerdReader reader, ReadContext ctx, Node* dest) { switch (peek_byte(reader)) { case '_': - *dest = make_node(BLANK, read_nodeID(reader), 0, 0); + *dest = make_node(SERD_BLANK_ID, read_nodeID(reader), 0, 0); return true; case '[': eat_byte(reader, '['); read_ws_star(reader); if (peek_byte(reader) == ']') { eat_byte(reader, ']'); - *dest = make_node(BLANK, blank_id(reader), 0, 0); + *dest = make_node(SERD_BLANK_ID, blank_id(reader), 0, 0); if (ctx.subject) { emit_statement(reader, ctx.graph, ctx.subject, ctx.predicate, dest); } - } else { - *dest = make_node(BLANK, blank_id(reader), 0, 0); - if (ctx.subject) { - emit_statement(reader, ctx.graph, ctx.subject, ctx.predicate, dest); - } - ctx.subject = dest; - read_predicateObjectList(reader, ctx); - read_ws_star(reader); - eat_byte(reader, ']'); + return true; + } + *dest = make_node(SERD_ANON_BEGIN, blank_id(reader), 0, 0); + if (ctx.subject) { + emit_statement(reader, ctx.graph, ctx.subject, ctx.predicate, dest); + dest->type = SERD_ANON; + } + ctx.subject = dest; + read_predicateObjectList(reader, ctx); + read_ws_star(reader); + eat_byte(reader, ']'); + if (reader->end_sink) { + reader->end_sink(reader->handle, deref(reader, dest->value)); } return true; case '(': @@ -1046,14 +1034,14 @@ is_object_end(const uint8_t c) // Recurses, calling statement_sink for every statement encountered. // Leaves stack in original calling state (i.e. pops everything it pushes). static bool -read_object(SerdReader reader, Context ctx) +read_object(SerdReader reader, ReadContext ctx) { static const char* const XSD_BOOLEAN = "http://www.w3.org/2001/XMLSchema#boolean"; static const size_t XSD_BOOLEAN_LEN = 40; uint8_t pre[6]; bool ret = false; - bool emit = true; + bool emit = (ctx.subject != 0); Node o = SERD_NODE_NULL; const uint8_t c = peek_byte(reader); switch (c) { @@ -1086,14 +1074,14 @@ read_object(SerdReader reader, Context ctx) eat_string(reader, "true", 4); const Ref value = push_string(reader, "true", 5); const Ref datatype = push_string(reader, XSD_BOOLEAN, XSD_BOOLEAN_LEN + 1); - o = make_node(LITERAL, value, datatype, 0); + o = make_node(SERD_LITERAL, value, datatype, 0); } else if (!memcmp(pre, "false", 5) && is_object_end(pre[5])) { eat_string(reader, "false", 5); const Ref value = push_string(reader, "false", 6); const Ref datatype = push_string(reader, XSD_BOOLEAN, XSD_BOOLEAN_LEN + 1); - o = make_node(LITERAL, value, datatype, 0); + o = make_node(SERD_LITERAL, value, datatype, 0); } else if (!is_object_end(c)) { - o = make_node(QNAME, read_qname(reader), 0, 0); + o = make_node(SERD_CURIE, read_qname(reader), 0, 0); } ret = o.value; } @@ -1113,7 +1101,7 @@ except: // Spec: [8] objectList ::= object ( ',' object )* // Actual: [8] objectList ::= object ( ws* ',' ws* object )* static bool -read_objectList(SerdReader reader, Context ctx) +read_objectList(SerdReader reader, ReadContext ctx) { TRY_RET(read_object(reader, ctx)); read_ws_star(reader); @@ -1129,7 +1117,7 @@ read_objectList(SerdReader reader, Context ctx) // Spec: [7] predicateObjectList ::= verb objectList ( ';' verb objectList )* ( ';' )? // Actual: [7] predicateObjectList ::= verb ws+ objectList ( ws* ';' ws* verb ws+ objectList )* ( ';' )? static bool -read_predicateObjectList(SerdReader reader, Context ctx) +read_predicateObjectList(SerdReader reader, ReadContext ctx) { if (reader->eof) { return false; @@ -1166,7 +1154,7 @@ except: /** Recursive helper for read_collection. */ static bool -read_collection_rec(SerdReader reader, Context ctx) +read_collection_rec(SerdReader reader, ReadContext ctx) { read_ws_star(reader); if (peek_byte(reader) == ')') { @@ -1174,7 +1162,7 @@ read_collection_rec(SerdReader reader, Context ctx) emit_statement(reader, NULL, ctx.subject, &reader->rdf_rest, &reader->rdf_nil); return false; } else { - const Node rest = make_node(BLANK, blank_id(reader), 0, 0); + const Node rest = make_node(SERD_BLANK_ID, blank_id(reader), 0, 0); emit_statement(reader, ctx.graph, ctx.subject, &reader->rdf_rest, &rest); ctx.subject = &rest; ctx.predicate = &reader->rdf_first; @@ -1192,7 +1180,7 @@ read_collection_rec(SerdReader reader, Context ctx) // [22] itemList ::= object+ // [23] collection ::= '(' itemList? ')' static bool -read_collection(SerdReader reader, Context ctx, Node* dest) +read_collection(SerdReader reader, ReadContext ctx, Node* dest) { TRY_RET(eat_byte(reader, '(')); read_ws_star(reader); @@ -1202,7 +1190,7 @@ read_collection(SerdReader reader, Context ctx, Node* dest) return true; } - *dest = make_node(BLANK, blank_id(reader), 0, 0); + *dest = make_node(SERD_BLANK_ID, blank_id(reader), 0, 0); ctx.subject = dest; ctx.predicate = &reader->rdf_first; if (!read_object(reader, ctx)) { @@ -1216,7 +1204,7 @@ read_collection(SerdReader reader, Context ctx, Node* dest) // [11] subject ::= resource | blank static Node -read_subject(SerdReader reader, Context ctx) +read_subject(SerdReader reader, ReadContext ctx) { Node subject = SERD_NODE_NULL; switch (peek_byte(reader)) { @@ -1232,17 +1220,18 @@ read_subject(SerdReader reader, Context ctx) // Spec: [6] triples ::= subject predicateObjectList // Actual: [6] triples ::= subject ws+ predicateObjectList static bool -read_triples(SerdReader reader, Context ctx) +read_triples(SerdReader reader, ReadContext ctx) { const Node subject = read_subject(reader, ctx); + bool ret = false; if (subject.value != 0) { ctx.subject = &subject; TRY_RET(read_ws_plus(reader)); - const bool ret = read_predicateObjectList(reader, ctx); + ret = read_predicateObjectList(reader, ctx); pop_string(reader, subject.value); - return ret; } - return false; + ctx.subject = ctx.predicate = 0; + return ret; } // [5] base ::= '@base' ws+ uriref @@ -1305,7 +1294,7 @@ read_directive(SerdReader reader) static bool read_statement(SerdReader reader) { - Context ctx = { 0, 0, 0 }; + ReadContext ctx = { 0, 0, 0 }; read_ws_star(reader); if (reader->eof) { return true; @@ -1338,7 +1327,8 @@ serd_reader_new(SerdSyntax syntax, void* handle, SerdBaseSink base_sink, SerdPrefixSink prefix_sink, - SerdStatementSink statement_sink) + SerdStatementSink statement_sink, + SerdEndSink end_sink) { const Cursor cur = { NULL, 0, 0 }; SerdReader reader = malloc(sizeof(struct SerdReaderImpl)); @@ -1346,10 +1336,9 @@ serd_reader_new(SerdSyntax syntax, reader->base_sink = base_sink; reader->prefix_sink = prefix_sink; reader->statement_sink = statement_sink; + reader->end_sink = end_sink; reader->fd = 0; - reader->stack.buf = malloc(STACK_CHUNK_SIZE); - reader->stack.buf_size = STACK_CHUNK_SIZE; - reader->stack.size = 8; + reader->stack = serd_stack_new(STACK_PAGE_SIZE); reader->cur = cur; reader->next_id = 1; reader->err = 0; @@ -1396,9 +1385,9 @@ serd_reader_read_file(SerdReader reader, FILE* file, const uint8_t* name) me->fd = file; me->cur = cur; - me->rdf_first = make_node(URI, push_string(me, RDF_FIRST, 49), 0, 0); - me->rdf_rest = make_node(URI, push_string(me, RDF_REST, 48), 0, 0); - me->rdf_nil = make_node(URI, push_string(me, RDF_NIL, 47), 0, 0); + me->rdf_first = make_node(SERD_URI, push_string(me, RDF_FIRST, 49), 0, 0); + me->rdf_rest = make_node(SERD_URI, push_string(me, RDF_REST, 48), 0, 0); + me->rdf_nil = make_node(SERD_URI, push_string(me, RDF_NIL, 47), 0, 0); fread(me->read_buf, 1, READ_BUF_LEN, file); const bool ret = read_turtleDoc(me); diff --git a/src/serd_stack.h b/src/serd_stack.h new file mode 100644 index 00000000..95289e64 --- /dev/null +++ b/src/serd_stack.h @@ -0,0 +1,79 @@ +/* Serd, an RDF serialisation library. + * Copyright 2011 David Robillard <d@drobilla.net> + * + * Serd is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Serd is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef SERD_STACK_H +#define SERD_STACK_H + +#include <assert.h> + +#include "serd/serd.h" + +/** An offset to start the stack at. Note 0 is reserved for NULL. */ +#define SERD_STACK_BOTTOM sizeof(void*) + +typedef struct { + uint8_t* buf; ///< Stack memory + size_t buf_size; ///< Allocated size of buf (>= size) + size_t size; ///< Conceptual size of stack in buf +} SerdStack; + +static inline SerdStack +serd_stack_new(size_t size) +{ + SerdStack stack; + stack.buf = malloc(size); + stack.buf_size = size; + stack.size = SERD_STACK_BOTTOM; + return stack; +} + +static inline bool +serd_stack_is_empty(SerdStack* stack) +{ + return stack->size <= SERD_STACK_BOTTOM; +} + +static inline void +serd_stack_free(SerdStack* stack) +{ + free(stack->buf); + stack->buf = NULL; + stack->buf_size = 0; + stack->size = 0; +} + +static inline uint8_t* +serd_stack_push(SerdStack* stack, size_t n_bytes) +{ + const size_t new_size = stack->size + n_bytes; + if (stack->buf_size < new_size) { + stack->buf_size *= 2; + stack->buf = realloc(stack->buf, stack->buf_size); + } + uint8_t* const ret = (stack->buf + stack->size); + stack->size = new_size; + return ret; +} + +static inline void +serd_stack_pop(SerdStack* stack, size_t n_bytes) +{ + assert(stack->size >= n_bytes); + stack->size -= n_bytes; +} + +#endif // SERD_STACK_H diff --git a/src/serdi.c b/src/serdi.c index 73653792..2aa0a08e 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -115,6 +115,14 @@ event_statement(void* handle, object, object_type, object_datatype, object_lang); } +static bool +event_end(void* handle, + const SerdString* subject) +{ + State* const state = (State*)handle; + return serd_writer_end_anon(state->writer, subject); +} + int print_usage(const char* name, bool error) { @@ -218,7 +226,7 @@ main(int argc, char** argv) }; SerdReader reader = serd_reader_new( - SERD_TURTLE, &state, event_base, event_prefix, event_statement); + SERD_TURTLE, &state, event_base, event_prefix, event_statement, event_end); const bool success = serd_reader_read_file(reader, in_fd, in_filename); serd_reader_free(reader); diff --git a/src/writer.c b/src/writer.c index 1e50b7b0..3e91bcd2 100644 --- a/src/writer.c +++ b/src/writer.c @@ -20,6 +20,15 @@ #include <stdlib.h> #include "serd/serd.h" +#include "serd_stack.h" + +typedef struct { + const SerdString* graph; + const SerdString* subject; + const SerdString* predicate; +} WriteContext; + +static const WriteContext WRITE_CONTEXT_NULL = { 0, 0, 0 }; typedef bool (*StatementWriter)(SerdWriter writer, const SerdString* graph, @@ -43,13 +52,12 @@ struct SerdWriterImpl { SerdStyle style; SerdEnv env; SerdURI base_uri; + SerdStack anon_stack; SerdSink sink; void* stream; StatementWriter write_statement; NodeWriter write_node; - const SerdString* prev_g; - const SerdString* prev_s; - const SerdString* prev_p; + WriteContext context; unsigned indent; }; @@ -57,10 +65,18 @@ typedef enum { WRITE_NORMAL, WRITE_URI, WRITE_STRING -} WriteContext; +} TextContext; + +static inline WriteContext* +anon_stack_top(SerdWriter writer) +{ + assert(!serd_stack_is_empty(&writer->anon_stack)); + return (WriteContext*)(writer->anon_stack.buf + + writer->anon_stack.size - sizeof(WriteContext)); +} static bool -write_text(SerdWriter writer, WriteContext ctx, +write_text(SerdWriter writer, TextContext ctx, const uint8_t* utf8, size_t n_bytes, uint8_t terminator) { char escape[10] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; @@ -147,6 +163,24 @@ write_text(SerdWriter writer, WriteContext ctx, return true; } +static void +serd_writer_write_delim(SerdWriter writer, const uint8_t delim) +{ + switch (delim) { + case 0: + case '\n': + break; + default: + writer->sink(" ", 1, writer->stream); + case '[': + writer->sink(&delim, 1, writer->stream); + } + writer->sink("\n", 1, writer->stream); + for (unsigned i = 0; i < writer->indent; ++i) { + writer->sink("\t", 1, writer->stream); + } +} + static bool write_node(SerdWriter writer, SerdNodeType type, @@ -157,11 +191,26 @@ write_node(SerdWriter writer, SerdChunk uri_prefix; SerdChunk uri_suffix; switch (type) { - case BLANK: + case SERD_ANON_BEGIN: + if (writer->syntax != SERD_NTRIPLES) { + ++writer->indent; + serd_writer_write_delim(writer, '['); + WriteContext* ctx = (WriteContext*)serd_stack_push( + &writer->anon_stack, sizeof(WriteContext)); + *ctx = writer->context; + writer->context.subject = str; + writer->context.predicate = 0; + break; + } + case SERD_ANON: + if (writer->syntax != SERD_NTRIPLES) { + break; + } // else fall through + case SERD_BLANK_ID: writer->sink("_:", 2, writer->stream); writer->sink(str->buf, str->n_bytes - 1, writer->stream); break; - case QNAME: + case SERD_CURIE: switch (writer->syntax) { case SERD_NTRIPLES: if (!serd_env_expand(writer->env, str, &uri_prefix, &uri_suffix)) { @@ -177,7 +226,19 @@ write_node(SerdWriter writer, writer->sink(str->buf, str->n_bytes - 1, writer->stream); } break; - case URI: + case SERD_LITERAL: + writer->sink("\"", 1, writer->stream); + write_text(writer, WRITE_STRING, str->buf, str->n_bytes - 1, '"'); + writer->sink("\"", 1, writer->stream); + if (lang) { + writer->sink("@", 1, writer->stream); + writer->sink(lang->buf, lang->n_bytes - 1, writer->stream); + } else if (datatype) { + writer->sink("^^", 2, writer->stream); + write_node(writer, SERD_URI, datatype, NULL, NULL); + } + break; + case SERD_URI: if (!serd_uri_string_has_scheme(str->buf)) { SerdURI uri; if (serd_uri_parse(str->buf, &uri)) { @@ -196,39 +257,10 @@ write_node(SerdWriter writer, return true; } return false; - case LITERAL: - writer->sink("\"", 1, writer->stream); - write_text(writer, WRITE_STRING, str->buf, str->n_bytes - 1, '"'); - writer->sink("\"", 1, writer->stream); - if (lang) { - writer->sink("@", 1, writer->stream); - writer->sink(lang->buf, lang->n_bytes - 1, writer->stream); - } else if (datatype) { - writer->sink("^^", 2, writer->stream); - write_node(writer, URI, datatype, NULL, NULL); - } - break; } return true; } -static void -serd_writer_write_delim(SerdWriter writer, const uint8_t delim) -{ - switch (delim) { - case 0: - case '\n': - break; - default: - writer->sink(" ", 1, writer->stream); - writer->sink(&delim, 1, writer->stream); - } - writer->sink("\n", 1, writer->stream); - for (unsigned i = 0; i < writer->indent; ++i) { - writer->sink("\t", 1, writer->stream); - } -} - SERD_API bool serd_writer_write_statement(SerdWriter writer, @@ -262,38 +294,63 @@ serd_writer_write_statement_abbrev(SerdWriter writer, const SerdString* object_lang) { assert(subject && predicate && object); - if (subject == writer->prev_s) { - if (predicate == writer->prev_p) { + if (subject == writer->context.subject) { + if (predicate == writer->context.predicate) { ++writer->indent; serd_writer_write_delim(writer, ','); write_node(writer, object_type, object, object_datatype, object_lang); --writer->indent; } else { - serd_writer_write_delim(writer, ';'); + if (writer->context.predicate) { + serd_writer_write_delim(writer, ';'); + } else { + ++writer->indent; + serd_writer_write_delim(writer, '\n'); + } write_node(writer, predicate_type, predicate, NULL, NULL); + writer->context.predicate = predicate; writer->sink(" ", 1, writer->stream); write_node(writer, object_type, object, object_datatype, object_lang); } } else { - if (writer->prev_s) { - --writer->indent; - serd_writer_write_delim(writer, '.'); - serd_writer_write_delim(writer, '\n'); + if (writer->context.subject) { + if (writer->indent > 0) { + --writer->indent; + } + if (serd_stack_is_empty(&writer->anon_stack)) { + serd_writer_write_delim(writer, '.'); + serd_writer_write_delim(writer, '\n'); + } } - write_node(writer, subject_type, subject, NULL, NULL); - ++writer->indent; - serd_writer_write_delim(writer, 0); - writer->sink(" ", 1, writer->stream); + if (subject_type == SERD_ANON_BEGIN) { + writer->sink("[ ", 2, writer->stream); + ++writer->indent; + WriteContext* ctx = (WriteContext*)serd_stack_push( + &writer->anon_stack, sizeof(WriteContext)); + *ctx = writer->context; + writer->context.subject = subject; + writer->context.predicate = 0; + } else { + write_node(writer, subject_type, subject, NULL, NULL); + ++writer->indent; + if (subject_type != SERD_ANON_BEGIN && subject_type != SERD_ANON) { + serd_writer_write_delim(writer, '\n'); + } + } + + writer->context.subject = subject; + writer->context.predicate = 0; + write_node(writer, predicate_type, predicate, NULL, NULL); + writer->context.predicate = predicate; writer->sink(" ", 1, writer->stream); write_node(writer, object_type, object, object_datatype, object_lang); } - writer->prev_g = graph; - writer->prev_s = subject; - writer->prev_p = predicate; + const WriteContext new_context = { graph, subject, predicate }; + writer->context = new_context; return true; } @@ -321,10 +378,36 @@ serd_writer_write_statement_flat(SerdWriter writer, } SERD_API +bool +serd_writer_end_anon(SerdWriter writer, + const SerdString* subject) +{ + if (writer->syntax == SERD_NTRIPLES) { + return true; + } + if (serd_stack_is_empty(&writer->anon_stack)) { + fprintf(stderr, "unexpected SERD_END received\n"); + return false; + } + assert(writer->indent > 0); + --writer->indent; + serd_writer_write_delim(writer, '\n'); + writer->sink("]", 1, writer->stream); + writer->context = *anon_stack_top(writer); + serd_stack_pop(&writer->anon_stack, sizeof(WriteContext)); + if (serd_stack_is_empty(&writer->anon_stack)) { + // End of anonymous subject, reset context + writer->context.subject = subject; + writer->context.predicate = 0; + } + return true; +} + +SERD_API void serd_writer_finish(SerdWriter writer) { - if (writer->prev_s) { + if (writer->context.subject) { writer->sink(" .\n", 3, writer->stream); } } @@ -338,17 +421,17 @@ serd_writer_new(SerdSyntax syntax, SerdSink sink, void* stream) { - SerdWriter writer = malloc(sizeof(struct SerdWriterImpl)); - writer->syntax = syntax; - writer->style = style; - writer->env = env; - writer->base_uri = *base_uri; - writer->sink = sink; - writer->stream = stream; - writer->prev_g = 0; - writer->prev_s = 0; - writer->prev_p = 0; - writer->indent = 0; + const WriteContext context = WRITE_CONTEXT_NULL; + SerdWriter writer = malloc(sizeof(struct SerdWriterImpl)); + writer->syntax = syntax; + writer->style = style; + writer->env = env; + writer->base_uri = *base_uri; + writer->anon_stack = serd_stack_new(sizeof(WriteContext)); + writer->sink = sink; + writer->stream = stream; + writer->context = context; + writer->indent = 0; writer->write_node = write_node; if ((style & SERD_STYLE_ABBREVIATED)) { writer->write_statement = serd_writer_write_statement_abbrev; @@ -365,9 +448,9 @@ serd_writer_set_base_uri(SerdWriter writer, { writer->base_uri = *uri; if (writer->syntax != SERD_NTRIPLES) { - if (writer->prev_g || writer->prev_s) { + if (writer->context.graph || writer->context.subject) { writer->sink(" .\n\n", 4, writer->stream); - writer->prev_g = writer->prev_s = writer->prev_p = 0; + writer->context = WRITE_CONTEXT_NULL; } writer->sink("@base ", 6, writer->stream); writer->sink(" <", 2, writer->stream); @@ -383,11 +466,9 @@ serd_writer_set_prefix(SerdWriter writer, const SerdString* uri) { if (writer->syntax != SERD_NTRIPLES) { - if (writer->prev_g || writer->prev_s) { + if (writer->context.graph || writer->context.subject) { writer->sink(" .\n\n", 4, writer->stream); - writer->prev_g = 0; - writer->prev_s = 0; - writer->prev_p = 0; + writer->context = WRITE_CONTEXT_NULL; } writer->sink("@prefix ", 8, writer->stream); writer->sink(name->buf, name->n_bytes - 1, writer->stream); @@ -402,5 +483,6 @@ void serd_writer_free(SerdWriter writer) { SerdWriter const me = (SerdWriter)writer; + serd_stack_free(&writer->anon_stack); free(me); } |