diff options
Diffstat (limited to 'src/writer.c')
-rw-r--r-- | src/writer.c | 994 |
1 files changed, 994 insertions, 0 deletions
diff --git a/src/writer.c b/src/writer.c new file mode 100644 index 00000000..69a00d82 --- /dev/null +++ b/src/writer.c @@ -0,0 +1,994 @@ +/* + Copyright 2011-2017 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "serd_internal.h" + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +typedef struct { + SerdNode graph; + SerdNode subject; + SerdNode predicate; +} WriteContext; + +static const WriteContext WRITE_CONTEXT_NULL = { + { 0, 0, 0, 0, SERD_NOTHING }, + { 0, 0, 0, 0, SERD_NOTHING }, + { 0, 0, 0, 0, SERD_NOTHING } +}; + +typedef enum { + SEP_NONE, + SEP_END_S, ///< End of a subject ('.') + SEP_END_P, ///< End of a predicate (';') + SEP_END_O, ///< End of an object (',') + SEP_S_P, ///< Between a subject and predicate (whitespace) + SEP_P_O, ///< Between a predicate and object (whitespace) + SEP_ANON_BEGIN, ///< Start of anonymous node ('[') + SEP_ANON_END, ///< End of anonymous node (']') + SEP_LIST_BEGIN, ///< Start of list ('(') + SEP_LIST_SEP, ///< List separator (whitespace) + SEP_LIST_END, ///< End of list (')') + SEP_GRAPH_BEGIN, ///< Start of graph ('{') + SEP_GRAPH_END, ///< End of graph ('}') + SEP_URI_BEGIN, ///< URI start quote ('<') + SEP_URI_END ///< URI end quote ('>') +} Sep; + +typedef struct { + const char* str; ///< Sep string + uint8_t len; ///< Length of sep string + uint8_t space_before; ///< Newline before sep + uint8_t space_after_node; ///< Newline after sep if after node + uint8_t space_after_sep; ///< Newline after sep if after sep +} SepRule; + +static const SepRule rules[] = { + { NULL, 0, 0, 0, 0 }, + { " .\n\n", 4, 0, 0, 0 }, + { " ;", 2, 0, 1, 1 }, + { " ,", 2, 0, 1, 0 }, + { NULL, 0, 0, 1, 0 }, + { " ", 1, 0, 0, 0 }, + { "[", 1, 0, 1, 1 }, + { "]", 1, 1, 0, 0 }, + { "(", 1, 0, 0, 0 }, + { NULL, 1, 0, 1, 0 }, + { ")", 1, 1, 0, 0 }, + { " {", 2, 0, 1, 1 }, + { " }", 2, 0, 1, 1 }, + { "<", 1, 0, 0, 0 }, + { ">", 1, 0, 0, 0 }, + { "\n", 1, 0, 1, 0 } +}; + +struct SerdWriterImpl { + SerdSyntax syntax; + SerdStyle style; + SerdEnv* env; + SerdNode root_node; + SerdURI root_uri; + SerdURI base_uri; + SerdStack anon_stack; + SerdByteSink byte_sink; + SerdErrorSink error_sink; + void* error_handle; + WriteContext context; + SerdNode list_subj; + unsigned list_depth; + unsigned indent; + uint8_t* bprefix; + size_t bprefix_len; + Sep last_sep; + bool empty; +}; + +typedef enum { + WRITE_STRING, + WRITE_LONG_STRING +} TextContext; + +static bool +write_node(SerdWriter* writer, + const SerdNode* node, + const SerdNode* datatype, + const SerdNode* lang, + Field field, + SerdStatementFlags flags); + +static bool +supports_abbrev(const SerdWriter* writer) +{ + return writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG; +} + +static void +w_err(SerdWriter* writer, SerdStatus st, const char* fmt, ...) +{ + /* TODO: This results in errors with no file information, which is not + helpful when re-serializing a file (particularly for "undefined + namespace prefix" errors. The statement sink API needs to be changed to + add a Cursor parameter so the source can notify the writer of the + statement origin for better error reporting. */ + + va_list args; + va_start(args, fmt); + const SerdError e = { st, NULL, 0, 0, fmt, &args }; + serd_error(writer->error_sink, writer->error_handle, &e); + va_end(args); +} + +static inline WriteContext* +anon_stack_top(SerdWriter* writer) +{ + assert(!serd_stack_is_empty(&writer->anon_stack)); + return (WriteContext*)(writer->anon_stack.buf + + writer->anon_stack.size - sizeof(WriteContext)); +} + +static void +copy_node(SerdNode* dst, const SerdNode* src) +{ + if (src) { + dst->buf = (uint8_t*)realloc((char*)dst->buf, src->n_bytes + 1); + dst->n_bytes = src->n_bytes; + dst->n_chars = src->n_chars; + dst->flags = src->flags; + dst->type = src->type; + memcpy((char*)dst->buf, src->buf, src->n_bytes + 1); + } else { + dst->type = SERD_NOTHING; + } +} + +static inline size_t +sink(const void* buf, size_t len, SerdWriter* writer) +{ + return serd_byte_sink_write(buf, len, &writer->byte_sink); +} + +// Write a single character, as an escape for single byte characters +// (Caller prints any single byte characters that don't need escaping) +static size_t +write_character(SerdWriter* writer, const uint8_t* utf8, size_t* size) +{ + char escape[11] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + const uint32_t c = parse_utf8_char(utf8, size); + switch (*size) { + case 0: + w_err(writer, SERD_ERR_BAD_ARG, "invalid UTF-8: %X\n", utf8[0]); + return sink(replacement_char, sizeof(replacement_char), writer); + case 1: + snprintf(escape, sizeof(escape), "\\u%04X", utf8[0]); + return sink(escape, 6, writer); + default: + break; + } + + if (!(writer->style & SERD_STYLE_ASCII)) { + // Write UTF-8 character directly to UTF-8 output + return sink(utf8, *size, writer); + } + + if (c <= 0xFFFF) { + snprintf(escape, sizeof(escape), "\\u%04X", c); + return sink(escape, 6, writer); + } else { + snprintf(escape, sizeof(escape), "\\U%08X", c); + return sink(escape, 10, writer); + } +} + +static inline bool +uri_must_escape(const uint8_t c) +{ + switch (c) { + case ' ': case '"': case '<': case '>': case '\\': + case '^': case '`': case '{': case '|': case '}': + return true; + default: + return !in_range(c, 0x20, 0x7E); + } +} + +static size_t +write_uri(SerdWriter* writer, const uint8_t* utf8, size_t n_bytes) +{ + size_t len = 0; + for (size_t i = 0; i < n_bytes;) { + size_t j = i; // Index of next character that must be escaped + for (; j < n_bytes; ++j) { + if (uri_must_escape(utf8[j])) { + break; + } + } + + // Bulk write all characters up to this special one + len += sink(&utf8[i], j - i, writer); + if ((i = j) == n_bytes) { + break; // Reached end + } + + // Write UTF-8 character + size_t size = 0; + len += write_character(writer, utf8 + i, &size); + i += size; + if (size == 0) { + // Corrupt input, scan to start of next character + for (++i; i < n_bytes && (utf8[i] & 0x80); ++i) {} + } + } + return len; +} + +static bool +lname_must_escape(const uint8_t c) +{ + /* This arbitrary list of characters, most of which have nothing to do with + Turtle, must be handled as special cases here because the RDF and SPARQL + WGs are apparently intent on making the once elegant Turtle a baroque + and inconsistent mess, throwing elegance and extensibility completely + out the window for no good reason. + + Note '-', '.', and '_' are also in PN_LOCAL_ESC, but are valid unescaped + in local names, so they are not escaped here. */ + + switch (c) { + case '\'': case '!': case '#': case '$': case '%': case '&': + case '(': case ')': case '*': case '+': case ',': case '/': + case ';': case '=': case '?': case '@': case '~': + return true; + } + return false; +} + +static size_t +write_lname(SerdWriter* writer, const uint8_t* utf8, size_t n_bytes) +{ + size_t len = 0; + for (size_t i = 0; i < n_bytes; ++i) { + size_t j = i; // Index of next character that must be escaped + for (; j < n_bytes; ++j) { + if (lname_must_escape(utf8[j])) { + break; + } + } + + // Bulk write all characters up to this special one + len += sink(&utf8[i], j - i, writer); + if ((i = j) == n_bytes) { + break; // Reached end + } + + // Write escape + len += sink("\\", 1, writer); + len += sink(&utf8[i], 1, writer); + } + return len; +} + +static size_t +write_text(SerdWriter* writer, TextContext ctx, + const uint8_t* utf8, size_t n_bytes) +{ + size_t len = 0; + for (size_t i = 0; i < n_bytes;) { + // Fast bulk write for long strings of printable ASCII + size_t j = i; + for (; j < n_bytes; ++j) { + if (utf8[j] == '\\' || utf8[j] == '"' + || (!in_range(utf8[j], 0x20, 0x7E))) { + break; + } + } + + len += sink(&utf8[i], j - i, writer); + if ((i = j) == n_bytes) { + break; // Reached end + } + + const uint8_t in = utf8[i++]; + if (ctx == WRITE_LONG_STRING) { + switch (in) { + case '\\': len += sink("\\\\", 2, writer); continue; + case '\b': len += sink("\\b", 2, writer); continue; + case '\n': case '\r': case '\t': case '\f': + len += sink(&in, 1, writer); // Write character as-is + continue; + case '\"': + if (i == n_bytes) { // '"' at string end + len += sink("\\\"", 2, writer); + } else { + len += sink(&in, 1, writer); + } + continue; + default: break; + } + } else if (ctx == WRITE_STRING) { + switch (in) { + case '\\': len += sink("\\\\", 2, writer); continue; + case '\n': len += sink("\\n", 2, writer); continue; + case '\r': len += sink("\\r", 2, writer); continue; + case '\t': len += sink("\\t", 2, writer); continue; + case '"': len += sink("\\\"", 2, writer); continue; + default: break; + } + if (writer->syntax == SERD_TURTLE) { + switch (in) { + case '\b': len += sink("\\b", 2, writer); continue; + case '\f': len += sink("\\f", 2, writer); continue; + } + } + } + + // Write UTF-8 character + size_t size = 0; + len += write_character(writer, utf8 + i - 1, &size); + if (size == 0) { + // Corrupt input, scan to start of next character + for (; i < n_bytes && (utf8[i] & 0x80); ++i) {} + } else { + i += size - 1; + } + } + return len; +} + +static size_t +uri_sink(const void* buf, size_t len, void* stream) +{ + return write_uri((SerdWriter*)stream, (const uint8_t*)buf, len); +} + +static void +write_newline(SerdWriter* writer) +{ + sink("\n", 1, writer); + for (unsigned i = 0; i < writer->indent; ++i) { + sink("\t", 1, writer); + } +} + +static bool +write_sep(SerdWriter* writer, const Sep sep) +{ + const SepRule* rule = &rules[sep]; + if (rule->space_before) { + write_newline(writer); + } + if (rule->str) { + sink(rule->str, rule->len, writer); + } + if ((writer->last_sep && rule->space_after_sep) || + (!writer->last_sep && rule->space_after_node)) { + write_newline(writer); + } else if (writer->last_sep && rule->space_after_node) { + sink(" ", 1, writer); + } + writer->last_sep = sep; + return true; +} + +static SerdStatus +reset_context(SerdWriter* writer, bool graph) +{ + if (graph) { + writer->context.graph.type = SERD_NOTHING; + } + writer->context.subject.type = SERD_NOTHING; + writer->context.predicate.type = SERD_NOTHING; + writer->empty = false; + return SERD_SUCCESS; +} + +static SerdStatus +free_context(SerdWriter* writer) +{ + serd_node_free(&writer->context.graph); + serd_node_free(&writer->context.subject); + serd_node_free(&writer->context.predicate); + return reset_context(writer, true); +} + +static bool +is_inline_start(const SerdWriter* writer, Field field, SerdStatementFlags flags) +{ + return (supports_abbrev(writer) && + ((field == FIELD_SUBJECT && (flags & SERD_ANON_S_BEGIN)) || + (field == FIELD_OBJECT && (flags & SERD_ANON_O_BEGIN)))); +} + +static bool +write_literal(SerdWriter* writer, + const SerdNode* node, + const SerdNode* datatype, + const SerdNode* lang, + Field field, + SerdStatementFlags flags) +{ + if (supports_abbrev(writer) && datatype && datatype->buf) { + const char* type_uri = (const char*)datatype->buf; + if (!strncmp(type_uri, NS_XSD, sizeof(NS_XSD) - 1) && ( + !strcmp(type_uri + sizeof(NS_XSD) - 1, "boolean") || + !strcmp(type_uri + sizeof(NS_XSD) - 1, "integer"))) { + sink(node->buf, node->n_bytes, writer); + return true; + } else if (!strncmp(type_uri, NS_XSD, sizeof(NS_XSD) - 1) && + !strcmp(type_uri + sizeof(NS_XSD) - 1, "decimal") && + strchr((const char*)node->buf, '.') && + node->buf[node->n_bytes - 1] != '.') { + /* xsd:decimal literals without trailing digits, e.g. "5.", can + not be written bare in Turtle. We could add a 0 which is + prettier, but changes the text and breaks round tripping. + */ + sink(node->buf, node->n_bytes, writer); + return true; + } + } + + if (supports_abbrev(writer) + && (node->flags & (SERD_HAS_NEWLINE|SERD_HAS_QUOTE))) { + sink("\"\"\"", 3, writer); + write_text(writer, WRITE_LONG_STRING, node->buf, node->n_bytes); + sink("\"\"\"", 3, writer); + } else { + sink("\"", 1, writer); + write_text(writer, WRITE_STRING, node->buf, node->n_bytes); + sink("\"", 1, writer); + } + if (lang && lang->buf) { + sink("@", 1, writer); + sink(lang->buf, lang->n_bytes, writer); + } else if (datatype && datatype->buf) { + sink("^^", 2, writer); + return write_node(writer, datatype, NULL, NULL, FIELD_NONE, flags); + } + return true; +} + +// Return true iff `buf` is a valid prefixed name suffix +static inline bool +is_name(const uint8_t* buf, const size_t len) +{ + // TODO: This is more strict than it should be. + for (size_t i = 0; i < len; ++i) { + if (!(is_alpha(buf[i]) || is_digit(buf[i]))) { + return false; + } + } + return true; +} + +static bool +write_uri_node(SerdWriter* const writer, + const SerdNode* node, + const Field field, + const SerdStatementFlags flags) +{ + SerdNode prefix; + SerdChunk suffix; + + if (is_inline_start(writer, field, flags)) { + ++writer->indent; + write_sep(writer, SEP_ANON_BEGIN); + sink("== ", 3, writer); + } + + const bool has_scheme = serd_uri_string_has_scheme(node->buf); + if (field == FIELD_PREDICATE && supports_abbrev(writer) + && !strcmp((const char*)node->buf, NS_RDF "type")) { + return sink("a", 1, writer) == 1; + } else if (supports_abbrev(writer) + && !strcmp((const char*)node->buf, NS_RDF "nil")) { + return sink("()", 2, writer) == 2; + } else if (has_scheme && (writer->style & SERD_STYLE_CURIED) && + serd_env_qualify(writer->env, node, &prefix, &suffix) && + is_name(suffix.buf, suffix.len)) { + write_uri(writer, prefix.buf, prefix.n_bytes); + sink(":", 1, writer); + write_uri(writer, suffix.buf, suffix.len); + return true; + } + + write_sep(writer, SEP_URI_BEGIN); + if (writer->style & SERD_STYLE_RESOLVED) { + SerdURI in_base_uri, uri, abs_uri; + serd_env_get_base_uri(writer->env, &in_base_uri); + serd_uri_parse(node->buf, &uri); + serd_uri_resolve(&uri, &in_base_uri, &abs_uri); + bool rooted = uri_is_under(&writer->base_uri, &writer->root_uri); + SerdURI* root = rooted ? &writer->root_uri : & writer->base_uri; + if (!uri_is_under(&abs_uri, root) || + writer->syntax == SERD_NTRIPLES || + writer->syntax == SERD_NQUADS) { + serd_uri_serialise(&abs_uri, uri_sink, writer); + } else { + serd_uri_serialise_relative( + &uri, &writer->base_uri, root, uri_sink, writer); + } + } else { + write_uri(writer, node->buf, node->n_bytes); + } + write_sep(writer, SEP_URI_END); + if (is_inline_start(writer, field, flags)) { + sink(" ;", 2, writer); + write_newline(writer); + } + return true; +} + +static bool +write_curie(SerdWriter* const writer, + const SerdNode* node, + const Field field, + const SerdStatementFlags flags) +{ + SerdChunk prefix; + SerdChunk suffix; + SerdStatus st; + switch (writer->syntax) { + case SERD_NTRIPLES: + case SERD_NQUADS: + if ((st = serd_env_expand(writer->env, node, &prefix, &suffix))) { + w_err(writer, st, "undefined namespace prefix `%s'\n", node->buf); + return false; + } + write_sep(writer, SEP_URI_BEGIN); + write_uri(writer, prefix.buf, prefix.len); + write_uri(writer, suffix.buf, suffix.len); + write_sep(writer, SEP_URI_END); + break; + case SERD_TURTLE: + case SERD_TRIG: + if (is_inline_start(writer, field, flags)) { + ++writer->indent; + write_sep(writer, SEP_ANON_BEGIN); + sink("== ", 3, writer); + } + write_lname(writer, node->buf, node->n_bytes); + if (is_inline_start(writer, field, flags)) { + sink(" ;", 2, writer); + write_newline(writer); + } + } + return true; +} + +static bool +write_blank(SerdWriter* const writer, + const SerdNode* node, + const Field field, + const SerdStatementFlags flags) +{ + if (supports_abbrev(writer)) { + if (is_inline_start(writer, field, flags)) { + ++writer->indent; + return write_sep(writer, SEP_ANON_BEGIN); + } else if (field == FIELD_SUBJECT && (flags & SERD_LIST_S_BEGIN)) { + assert(writer->list_depth == 0); + copy_node(&writer->list_subj, node); + ++writer->list_depth; + ++writer->indent; + return write_sep(writer, SEP_LIST_BEGIN); + } else if (field == FIELD_OBJECT && (flags & SERD_LIST_O_BEGIN)) { + ++writer->indent; + ++writer->list_depth; + return write_sep(writer, SEP_LIST_BEGIN); + } else if ((field == FIELD_SUBJECT && (flags & SERD_EMPTY_S)) || + (field == FIELD_OBJECT && (flags & SERD_EMPTY_O))) { + return sink("[]", 2, writer) == 2; + } + } + + sink("_:", 2, writer); + if (writer->bprefix && !strncmp((const char*)node->buf, + (const char*)writer->bprefix, + writer->bprefix_len)) { + sink(node->buf + writer->bprefix_len, + node->n_bytes - writer->bprefix_len, + writer); + } else { + sink(node->buf, node->n_bytes, writer); + } + + return true; +} + +static bool +write_node(SerdWriter* writer, + const SerdNode* node, + const SerdNode* datatype, + const SerdNode* lang, + Field field, + SerdStatementFlags flags) +{ + bool ret = false; + switch (node->type) { + case SERD_LITERAL: + ret = write_literal(writer, node, datatype, lang, field, flags); + break; + case SERD_URI: + ret = write_uri_node(writer, node, field, flags); + break; + case SERD_CURIE: + ret = write_curie(writer, node, field, flags); + break; + case SERD_BLANK: + ret = write_blank(writer, node, field, flags); + default: break; + } + writer->last_sep = SEP_NONE; + return ret; +} + +static inline bool +is_resource(const SerdNode* node) +{ + return node->type > SERD_LITERAL; +} + +static void +write_pred(SerdWriter* writer, SerdStatementFlags flags, const SerdNode* pred) +{ + write_node(writer, pred, NULL, NULL, FIELD_PREDICATE, flags); + write_sep(writer, SEP_P_O); + copy_node(&writer->context.predicate, pred); +} + +static bool +write_list_obj(SerdWriter* writer, + SerdStatementFlags flags, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* datatype, + const SerdNode* lang) +{ + if (!strcmp((const char*)object->buf, NS_RDF "nil")) { + --writer->indent; + write_sep(writer, SEP_LIST_END); + return true; + } else if (!strcmp((const char*)predicate->buf, NS_RDF "first")) { + write_sep(writer, SEP_LIST_SEP); + write_node(writer, object, datatype, lang, FIELD_OBJECT, flags); + } + return false; +} + +SerdStatus +serd_writer_write_statement(SerdWriter* writer, + SerdStatementFlags flags, + const SerdNode* graph, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* datatype, + const SerdNode* lang) +{ + if (!subject || !predicate || !object + || !subject->buf || !predicate->buf || !object->buf + || !is_resource(subject) || !is_resource(predicate)) { + return SERD_ERR_BAD_ARG; + } + +#define TRY(write_result) \ + if (!(write_result)) { \ + return SERD_ERR_UNKNOWN; \ + } + + switch (writer->syntax) { + case SERD_NTRIPLES: + case SERD_NQUADS: + TRY(write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags)); + sink(" ", 1, writer); + TRY(write_node(writer, predicate, NULL, NULL, FIELD_PREDICATE, flags)); + sink(" ", 1, writer); + TRY(write_node(writer, object, datatype, lang, FIELD_OBJECT, flags)); + if (writer->syntax == SERD_NQUADS && graph) { + sink(" ", 1, writer); + TRY(write_node(writer, graph, datatype, lang, FIELD_GRAPH, flags)); + } + sink(" .\n", 3, writer); + return SERD_SUCCESS; + default: + break; + } + + if ((graph && !serd_node_equals(graph, &writer->context.graph)) || + (!graph && writer->context.graph.type)) { + writer->indent = 0; + if (writer->context.subject.type) { + write_sep(writer, SEP_END_S); + } + if (writer->context.graph.type) { + write_sep(writer, SEP_GRAPH_END); + } + + reset_context(writer, true); + if (graph) { + TRY(write_node(writer, graph, datatype, lang, FIELD_GRAPH, flags)); + ++writer->indent; + write_sep(writer, SEP_GRAPH_BEGIN); + copy_node(&writer->context.graph, graph); + } + } + + if ((flags & SERD_LIST_CONT)) { + if (write_list_obj(writer, flags, predicate, object, datatype, lang)) { + // Reached end of list + if (--writer->list_depth == 0 && writer->list_subj.type) { + reset_context(writer, false); + serd_node_free(&writer->context.subject); + writer->context.subject = writer->list_subj; + writer->list_subj = SERD_NODE_NULL; + } + return SERD_SUCCESS; + } + } else if (serd_node_equals(subject, &writer->context.subject)) { + if (serd_node_equals(predicate, &writer->context.predicate)) { + // Abbreviate S P + if (!(flags & SERD_ANON_O_BEGIN)) { + ++writer->indent; + } + write_sep(writer, SEP_END_O); + write_node(writer, object, datatype, lang, FIELD_OBJECT, flags); + if (!(flags & SERD_ANON_O_BEGIN)) { + --writer->indent; + } + } else { + // Abbreviate S + Sep sep = writer->context.predicate.type ? SEP_END_P : SEP_S_P; + write_sep(writer, sep); + write_pred(writer, flags, predicate); + write_node(writer, object, datatype, lang, FIELD_OBJECT, flags); + } + } else { + // No abbreviation + if (writer->context.subject.type) { + assert(writer->indent > 0); + --writer->indent; + if (serd_stack_is_empty(&writer->anon_stack)) { + write_sep(writer, SEP_END_S); + } + } else if (!writer->empty) { + write_sep(writer, SEP_S_P); + } + + if (!(flags & SERD_ANON_CONT)) { + write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags); + ++writer->indent; + write_sep(writer, SEP_S_P); + } else { + ++writer->indent; + } + + reset_context(writer, false); + copy_node(&writer->context.subject, subject); + + if (!(flags & SERD_LIST_S_BEGIN)) { + write_pred(writer, flags, predicate); + } + + write_node(writer, object, datatype, lang, FIELD_OBJECT, flags); + } + + if (flags & (SERD_ANON_S_BEGIN|SERD_ANON_O_BEGIN)) { + WriteContext* ctx = (WriteContext*)serd_stack_push( + &writer->anon_stack, sizeof(WriteContext)); + *ctx = writer->context; + WriteContext new_context = { + serd_node_copy(graph), serd_node_copy(subject), SERD_NODE_NULL }; + if ((flags & SERD_ANON_S_BEGIN)) { + new_context.predicate = serd_node_copy(predicate); + } + writer->context = new_context; + } else { + copy_node(&writer->context.graph, graph); + copy_node(&writer->context.subject, subject); + copy_node(&writer->context.predicate, predicate); + } + + return SERD_SUCCESS; +} + +SerdStatus +serd_writer_end_anon(SerdWriter* writer, + const SerdNode* node) +{ + if (writer->syntax == SERD_NTRIPLES || writer->syntax == SERD_NQUADS) { + return SERD_SUCCESS; + } + if (serd_stack_is_empty(&writer->anon_stack) || writer->indent == 0) { + w_err(writer, SERD_ERR_UNKNOWN, + "unexpected end of anonymous node\n"); + return SERD_ERR_UNKNOWN; + } + --writer->indent; + write_sep(writer, SEP_ANON_END); + free_context(writer); + writer->context = *anon_stack_top(writer); + serd_stack_pop(&writer->anon_stack, sizeof(WriteContext)); + const bool is_subject = serd_node_equals(node, &writer->context.subject); + if (is_subject) { + copy_node(&writer->context.subject, node); + writer->context.predicate.type = SERD_NOTHING; + } + return SERD_SUCCESS; +} + +SerdStatus +serd_writer_finish(SerdWriter* writer) +{ + if (writer->context.subject.type) { + write_sep(writer, SEP_END_S); + } + if (writer->context.graph.type) { + write_sep(writer, SEP_GRAPH_END); + } + serd_byte_sink_flush(&writer->byte_sink); + writer->indent = 0; + return free_context(writer); +} + +SerdWriter* +serd_writer_new(SerdSyntax syntax, + SerdStyle style, + SerdEnv* env, + const SerdURI* base_uri, + SerdSink ssink, + void* stream) +{ + const WriteContext context = WRITE_CONTEXT_NULL; + SerdWriter* writer = (SerdWriter*)calloc(1, sizeof(SerdWriter)); + writer->syntax = syntax; + writer->style = style; + writer->env = env; + writer->root_node = SERD_NODE_NULL; + writer->root_uri = SERD_URI_NULL; + writer->base_uri = base_uri ? *base_uri : SERD_URI_NULL; + writer->anon_stack = serd_stack_new(4 * sizeof(WriteContext)); + writer->context = context; + writer->list_subj = SERD_NODE_NULL; + writer->empty = true; + writer->byte_sink = serd_byte_sink_new( + ssink, stream, (style & SERD_STYLE_BULK) ? SERD_PAGE_SIZE : 1); + return writer; +} + +void +serd_writer_set_error_sink(SerdWriter* writer, + SerdErrorSink error_sink, + void* error_handle) +{ + writer->error_sink = error_sink; + writer->error_handle = error_handle; +} + +void +serd_writer_chop_blank_prefix(SerdWriter* writer, + const uint8_t* prefix) +{ + free(writer->bprefix); + writer->bprefix_len = 0; + writer->bprefix = NULL; + if (prefix) { + writer->bprefix_len = strlen((const char*)prefix); + writer->bprefix = (uint8_t*)malloc(writer->bprefix_len + 1); + memcpy(writer->bprefix, prefix, writer->bprefix_len + 1); + } +} + +SerdStatus +serd_writer_set_base_uri(SerdWriter* writer, + const SerdNode* uri) +{ + if (!serd_env_set_base_uri(writer->env, uri)) { + serd_env_get_base_uri(writer->env, &writer->base_uri); + + if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) { + if (writer->context.graph.type || writer->context.subject.type) { + sink(" .\n\n", 4, writer); + reset_context(writer, true); + } + sink("@base <", 7, writer); + sink(uri->buf, uri->n_bytes, writer); + sink("> .\n", 4, writer); + } + writer->indent = 0; + return reset_context(writer, true); + } + return SERD_ERR_UNKNOWN; +} + +SerdStatus +serd_writer_set_root_uri(SerdWriter* writer, + const SerdNode* uri) +{ + serd_node_free(&writer->root_node); + if (uri && uri->buf) { + writer->root_node = serd_node_copy(uri); + serd_uri_parse(uri->buf, &writer->root_uri); + } else { + writer->root_node = SERD_NODE_NULL; + writer->root_uri = SERD_URI_NULL; + } + return SERD_SUCCESS; +} + +SerdStatus +serd_writer_set_prefix(SerdWriter* writer, + const SerdNode* name, + const SerdNode* uri) +{ + if (!serd_env_set_prefix(writer->env, name, uri)) { + if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) { + if (writer->context.graph.type || writer->context.subject.type) { + sink(" .\n\n", 4, writer); + reset_context(writer, true); + } + sink("@prefix ", 8, writer); + sink(name->buf, name->n_bytes, writer); + sink(": <", 3, writer); + write_uri(writer, uri->buf, uri->n_bytes); + sink("> .\n", 4, writer); + } + writer->indent = 0; + return reset_context(writer, true); + } + return SERD_ERR_UNKNOWN; +} + +void +serd_writer_free(SerdWriter* writer) +{ + serd_writer_finish(writer); + serd_stack_free(&writer->anon_stack); + free(writer->bprefix); + serd_byte_sink_free(&writer->byte_sink); + serd_node_free(&writer->root_node); + free(writer); +} + +SerdEnv* +serd_writer_get_env(SerdWriter* writer) +{ + return writer->env; +} + +size_t +serd_file_sink(const void* buf, size_t len, void* stream) +{ + return fwrite(buf, 1, len, (FILE*)stream); +} + +size_t +serd_chunk_sink(const void* buf, size_t len, void* stream) +{ + SerdChunk* chunk = (SerdChunk*)stream; + chunk->buf = (uint8_t*)realloc((uint8_t*)chunk->buf, chunk->len + len); + memcpy((uint8_t*)chunk->buf + chunk->len, buf, len); + chunk->len += len; + return len; +} + +uint8_t* +serd_chunk_sink_finish(SerdChunk* stream) +{ + serd_chunk_sink("", 1, stream); + return (uint8_t*)stream->buf; +} |