From 22ac239266b01f067ece83eb6addcdc9f825780e Mon Sep 17 00:00:00 2001 From: David Robillard Date: Mon, 12 Dec 2011 05:10:49 +0000 Subject: Make bulk writer internal and inlinable to avoid function call overhead in the writer. git-svn-id: http://svn.drobilla.net/serd/trunk@254 490d8e77-9747-427b-9fa3-0b8f29cee8a0 --- serd/serd.h | 55 +++------------------ src/serd_internal.h | 73 +++++++++++++++++++++++++++ src/serdi.c | 10 +--- src/sink.c | 83 ------------------------------- src/writer.c | 139 +++++++++++++++++++++++++++++----------------------- wscript | 1 - 6 files changed, 162 insertions(+), 199 deletions(-) delete mode 100644 src/sink.c diff --git a/serd/serd.h b/serd/serd.h index 94499a4d..b20dacf2 100644 --- a/serd/serd.h +++ b/serd/serd.h @@ -239,7 +239,8 @@ typedef enum { SERD_STYLE_ABBREVIATED = 1, /**< Abbreviate triples when possible. */ SERD_STYLE_ASCII = 1 << 1, /**< Escape all non-ASCII characters. */ SERD_STYLE_RESOLVED = 1 << 2, /**< Resolve URIs against base URI. */ - SERD_STYLE_CURIED = 1 << 3 /**< Shorten URIs into CURIEs. */ + SERD_STYLE_CURIED = 1 << 3, /**< Shorten URIs into CURIEs. */ + SERD_STYLE_BULK = 1 << 4, /**< Write output in pages. */ } SerdStyle; /** @@ -265,57 +266,12 @@ SERD_API size_t serd_strlen(const uint8_t* str, size_t* n_bytes, SerdNodeFlags* flags); -/** - @} - @name Sink - @{ -*/ - -/** - Sink function for raw string output. -*/ -typedef size_t (*SerdSink)(const void* buf, size_t len, void* stream); - -/** - Sink adapter that writes blocks to the target sink. - - This is itself a SerdSink which can be used with any SerdSink as a target to - transparently write chunked I/O to the output sink. This can significantly - improve write performance when the target is a file or similar resource. -*/ -typedef struct SerdBulkSinkImpl SerdBulkSink; - -/** - Create a new bulk sink adapter. - @param sink Target sink where completed blocks will be written. - @param stream Stream parameter for target sink. - @param block_size Size of blocks to write, and internal buffer size. -*/ -SERD_API -SerdBulkSink* -serd_bulk_sink_new(SerdSink sink, void* stream, size_t block_size); - -/** - Free a bulk sink adapter. -*/ -SERD_API -void -serd_bulk_sink_free(SerdBulkSink* bsink); - -/** - Write data to a bulk sink adapter. - - This function may safely be cast to SerdSink. -*/ -SERD_API -size_t -serd_bulk_sink_write(const void* buf, size_t len, SerdBulkSink* bsink); - /** @} @name URI @{ */ + static const SerdURI SERD_URI_NULL = {{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}}; /** @@ -346,6 +302,11 @@ SERD_API void serd_uri_resolve(const SerdURI* uri, const SerdURI* base, SerdURI* out); +/** + Sink function for raw string output. +*/ +typedef size_t (*SerdSink)(const void* buf, size_t len, void* stream); + /** Serialise @c uri with a series of calls to @c sink. */ diff --git a/src/serd_internal.h b/src/serd_internal.h index bf1f630f..5fb9f0ef 100644 --- a/src/serd_internal.h +++ b/src/serd_internal.h @@ -34,6 +34,12 @@ #define SERD_PAGE_SIZE 4096 +#ifndef MIN +# define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#endif + +/* File and Buffer Utilities */ + static inline FILE* serd_fopen(const char* path, const char* mode) { @@ -60,6 +66,8 @@ serd_bufalloc(size_t size) #endif } +/* Stack */ + /** A dynamic stack in memory. */ typedef struct { uint8_t* buf; ///< Stack memory @@ -115,6 +123,71 @@ serd_stack_pop(SerdStack* stack, size_t n_bytes) stack->size -= n_bytes; } +/* Bulk Sink */ + +typedef struct SerdBulkSinkImpl { + SerdSink sink; + void* stream; + uint8_t* buf; + size_t size; + size_t block_size; +} SerdBulkSink; + +static inline SerdBulkSink +serd_bulk_sink_new(SerdSink sink, void* stream, size_t block_size) +{ + SerdBulkSink bsink; + bsink.sink = sink; + bsink.stream = stream; + bsink.size = 0; + bsink.block_size = block_size; + bsink.buf = serd_bufalloc(block_size); + return bsink; +} + +static inline void +serd_bulk_sink_flush(SerdBulkSink* bsink) +{ + if (bsink->size > 0) { + bsink->sink(bsink->buf, bsink->size, bsink->stream); + } + bsink->size = 0; +} + +static inline void +serd_bulk_sink_free(SerdBulkSink* bsink) +{ + if (bsink) { + serd_bulk_sink_flush(bsink); + free(bsink->buf); + } +} + +static inline size_t +serd_bulk_sink_write(const void* buf, size_t len, SerdBulkSink* bsink) +{ + const size_t orig_len = len; + while (len) { + const size_t space = bsink->block_size - bsink->size; + const size_t n = MIN(space, len); + + // Write as much as possible into the remaining buffer space + memcpy(bsink->buf + bsink->size, buf, n); + bsink->size += n; + buf = (uint8_t*)buf + n; + len -= n; + + // Flush page if buffer is full + if (bsink->size == bsink->block_size) { + bsink->sink(bsink->buf, bsink->block_size, bsink->stream); + bsink->size = 0; + } + } + return orig_len; +} + +/* Character utilities */ + /** Return true if @a c lies within [min...max] (inclusive) */ static inline bool in_range(const uint8_t c, const uint8_t min, const uint8_t max) diff --git a/src/serdi.c b/src/serdi.c index c5183008..989821fb 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -197,17 +197,12 @@ main(int argc, char** argv) output_style |= SERD_STYLE_RESOLVED; } - SerdSink sink = file_sink; - void* stream = out_fd; - SerdBulkSink* bulk_sink = NULL; if (bulk_write) { - bulk_sink = serd_bulk_sink_new(sink, stream, SERD_PAGE_SIZE); - sink = (SerdSink)serd_bulk_sink_write; - stream = bulk_sink; + output_style |= SERD_STYLE_BULK; } SerdWriter* writer = serd_writer_new( - output_syntax, output_style, env, &base_uri, sink, stream); + output_syntax, output_style, env, &base_uri, file_sink, out_fd); if (chop_prefix) { serd_writer_chop_blank_prefix(writer, chop_prefix); @@ -238,7 +233,6 @@ main(int argc, char** argv) serd_writer_finish(state.writer); serd_writer_free(state.writer); - serd_bulk_sink_free(bulk_sink); serd_env_free(state.env); serd_node_free(&base_uri_node); diff --git a/src/sink.c b/src/sink.c deleted file mode 100644 index 3fa90e8c..00000000 --- a/src/sink.c +++ /dev/null @@ -1,83 +0,0 @@ -/* - Copyright 2011 David Robillard - - Permission to use, copy, modify, and/or distribute this software for any - purpose with or without fee is hereby granted, provided that the above - copyright notice and this permission notice appear in all copies. - - THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -*/ - -#include "serd_internal.h" - -#include -#include - -#ifndef MIN -# define MIN(a, b) (((a) < (b)) ? (a) : (b)) -#endif - -struct SerdBulkSinkImpl { - SerdSink sink; - void* stream; - uint8_t* buf; - size_t size; - size_t block_size; -}; - -SERD_API -SerdBulkSink* -serd_bulk_sink_new(SerdSink sink, void* stream, size_t block_size) -{ - SerdBulkSink* bsink = (SerdBulkSink*)malloc(sizeof(SerdBulkSink)); - bsink->sink = sink; - bsink->stream = stream; - bsink->size = 0; - bsink->block_size = block_size; - bsink->buf = serd_bufalloc(block_size); - return bsink; -} - -SERD_API -void -serd_bulk_sink_free(SerdBulkSink* bsink) -{ - if (bsink) { - // Flush any remaining output - if (bsink->size > 0) { - bsink->sink(bsink->buf, bsink->size, bsink->stream); - } - free(bsink->buf); - free(bsink); - } -} - -SERD_API -size_t -serd_bulk_sink_write(const void* buf, size_t len, SerdBulkSink* bsink) -{ - const size_t orig_len = len; - while (len > 0) { - const size_t space = bsink->block_size - bsink->size; - const size_t n = MIN(space, len); - - // Write as much as possible into the remaining buffer space - memcpy(bsink->buf + bsink->size, buf, n); - bsink->size += n; - buf = (uint8_t*)buf + n; - len -= n; - - // Flush page if buffer is full - if (bsink->size == bsink->block_size) { - bsink->sink(bsink->buf, bsink->block_size, bsink->stream); - bsink->size = 0; - } - } - return orig_len; -} diff --git a/src/writer.c b/src/writer.c index d7520bfd..d8fe039f 100644 --- a/src/writer.c +++ b/src/writer.c @@ -40,6 +40,7 @@ struct SerdWriterImpl { SerdEnv* env; SerdURI base_uri; SerdStack anon_stack; + SerdBulkSink bulk_sink; SerdSink sink; void* stream; WriteContext context; @@ -80,6 +81,16 @@ copy_node(SerdNode* dst, const SerdNode* src) memcpy((char*)dst->buf, src->buf, src->n_bytes + 1); } +static inline size_t +sink(const void* buf, size_t len, SerdWriter* writer) +{ + if (writer->style & SERD_STYLE_BULK) { + return serd_bulk_sink_write(buf, len, &writer->bulk_sink); + } else { + return writer->sink(buf, len, writer->stream); + } +} + static bool write_text(SerdWriter* writer, TextContext ctx, const uint8_t* utf8, size_t n_bytes, uint8_t terminator) @@ -97,7 +108,7 @@ write_text(SerdWriter* writer, TextContext ctx, } if (j > i) { - writer->sink(&utf8[i], j - i, writer->stream); + sink(&utf8[i], j - i, writer); i = j; continue; } @@ -105,17 +116,17 @@ write_text(SerdWriter* writer, TextContext ctx, uint8_t in = utf8[i++]; if (ctx == WRITE_LONG_STRING) { if (in == '\\') { - writer->sink("\\\\", 2, writer->stream); continue; + sink("\\\\", 2, writer); continue; } } else { switch (in) { - case '\\': writer->sink("\\\\", 2, writer->stream); continue; - case '\n': writer->sink("\\n", 2, writer->stream); continue; - case '\r': writer->sink("\\r", 2, writer->stream); continue; - case '\t': writer->sink("\\t", 2, writer->stream); continue; + case '\\': sink("\\\\", 2, writer); continue; + case '\n': sink("\\n", 2, writer); continue; + case '\r': sink("\\r", 2, writer); continue; + case '\t': sink("\\t", 2, writer); continue; case '"': if (terminator == '"') { - writer->sink("\\\"", 2, writer->stream); + sink("\\\"", 2, writer); continue; } // else fall-through default: break; @@ -123,7 +134,7 @@ write_text(SerdWriter* writer, TextContext ctx, if (in == terminator) { snprintf(escape, 7, "\\u%04X", terminator); - writer->sink(escape, 6, writer->stream); + sink(escape, 6, writer); continue; } } @@ -134,7 +145,7 @@ write_text(SerdWriter* writer, TextContext ctx, size = 1; c = in & 0x7F; if (in_range(in, 0x20, 0x7E)) { // Printable ASCII - writer->sink(&in, 1, writer->stream); + sink(&in, 1, writer); continue; } } else if ((in & 0xE0) == 0xC0) { // Starts with `110' @@ -155,7 +166,7 @@ write_text(SerdWriter* writer, TextContext ctx, && !(writer->style & SERD_STYLE_ASCII)) { // Write UTF-8 character directly to UTF-8 output // TODO: Scan to next escape and write entire range at once - writer->sink(utf8 + i - 1, size, writer->stream); + sink(utf8 + i - 1, size, writer); i += size - 1; continue; } @@ -175,10 +186,10 @@ write_text(SerdWriter* writer, TextContext ctx, if (c < 0xFFFF) { snprintf(escape, 7, "\\u%04X", c); - writer->sink(escape, 6, writer->stream); + sink(escape, 6, writer); } else { snprintf(escape, 11, "\\U%08X", c); - writer->sink(escape, 10, writer->stream); + sink(escape, 10, writer); } } return true; @@ -191,13 +202,13 @@ serd_writer_write_delim(SerdWriter* writer, const uint8_t delim) case '\n': break; default: - writer->sink(" ", 1, writer->stream); + sink(" ", 1, writer); case '[': - writer->sink(&delim, 1, writer->stream); + sink(&delim, 1, writer); } - writer->sink("\n", 1, writer->stream); + sink("\n", 1, writer); for (unsigned i = 0; i < writer->indent; ++i) { - writer->sink("\t", 1, writer->stream); + sink("\t", 1, writer); } } @@ -246,17 +257,17 @@ write_node(SerdWriter* writer, } else if (writer->syntax != SERD_NTRIPLES && ((field == FIELD_SUBJECT && (flags & SERD_EMPTY_S)) || (field == FIELD_OBJECT && (flags & SERD_EMPTY_O)))) { - writer->sink("[]", 2, writer->stream); + sink("[]", 2, writer); } else { - writer->sink("_:", 2, writer->stream); + sink("_:", 2, writer); if (writer->bprefix && !strncmp((const char*)node->buf, (const char*)writer->bprefix, writer->bprefix_len)) { - writer->sink(node->buf + writer->bprefix_len, - node->n_bytes - writer->bprefix_len, - writer->stream); + sink(node->buf + writer->bprefix_len, + node->n_bytes - writer->bprefix_len, + writer); } else { - writer->sink(node->buf, node->n_bytes, writer->stream); + sink(node->buf, node->n_bytes, writer); } } break; @@ -267,13 +278,13 @@ write_node(SerdWriter* writer, fprintf(stderr, "Undefined namespace prefix `%s'\n", node->buf); return false; } - writer->sink("<", 1, writer->stream); + sink("<", 1, writer); write_text(writer, WRITE_URI, uri_prefix.buf, uri_prefix.len, '>'); write_text(writer, WRITE_URI, uri_suffix.buf, uri_suffix.len, '>'); - writer->sink(">", 1, writer->stream); + sink(">", 1, writer); break; case SERD_TURTLE: - writer->sink(node->buf, node->n_bytes, writer->stream); + sink(node->buf, node->n_bytes, writer); } break; case SERD_LITERAL: @@ -282,34 +293,34 @@ write_node(SerdWriter* writer, if (!strcmp((const char*)datatype->buf, NS_XSD "boolean") || !strcmp((const char*)datatype->buf, NS_XSD "decimal") || !strcmp((const char*)datatype->buf, NS_XSD "integer")) { - writer->sink(node->buf, node->n_bytes, writer->stream); + sink(node->buf, node->n_bytes, writer); break; } } if (writer->syntax != SERD_NTRIPLES && ((node->flags & SERD_HAS_NEWLINE) || (node->flags & SERD_HAS_QUOTE))) { - writer->sink("\"\"\"", 3, writer->stream); + sink("\"\"\"", 3, writer); write_text(writer, WRITE_LONG_STRING, node->buf, node->n_bytes, '\0'); - writer->sink("\"\"\"", 3, writer->stream); + sink("\"\"\"", 3, writer); } else { - writer->sink("\"", 1, writer->stream); + sink("\"", 1, writer); write_text(writer, WRITE_STRING, node->buf, node->n_bytes, '"'); - writer->sink("\"", 1, writer->stream); + sink("\"", 1, writer); } if (lang && lang->buf) { - writer->sink("@", 1, writer->stream); - writer->sink(lang->buf, lang->n_bytes, writer->stream); + sink("@", 1, writer); + sink(lang->buf, lang->n_bytes, writer); } else if (datatype && datatype->buf) { - writer->sink("^^", 2, writer->stream); + sink("^^", 2, writer); write_node(writer, datatype, NULL, NULL, FIELD_NONE, flags); } break; case SERD_URI: if ((writer->syntax == SERD_TURTLE) && !strcmp((const char*)node->buf, NS_RDF "type")) { - writer->sink("a", 1, writer->stream); + sink("a", 1, writer); return true; } else if ((writer->style & SERD_STYLE_CURIED) && serd_uri_string_has_scheme(node->buf)) { @@ -317,7 +328,7 @@ write_node(SerdWriter* writer, SerdChunk suffix; if (serd_env_qualify(writer->env, node, &prefix, &suffix)) { write_text(writer, WRITE_URI, prefix.buf, prefix.n_bytes, '>'); - writer->sink(":", 1, writer->stream); + sink(":", 1, writer); write_text(writer, WRITE_URI, suffix.buf, suffix.len, '>'); return true; } @@ -327,15 +338,15 @@ write_node(SerdWriter* writer, if (!serd_uri_parse(node->buf, &uri)) { SerdURI abs_uri; serd_uri_resolve(&uri, &writer->base_uri, &abs_uri); - writer->sink("<", 1, writer->stream); - serd_uri_serialise(&abs_uri, writer->sink, writer->stream); - writer->sink(">", 1, writer->stream); + sink("<", 1, writer); + serd_uri_serialise(&abs_uri, (SerdSink)sink, writer); + sink(">", 1, writer); return true; } } - writer->sink("<", 1, writer->stream); + sink("<", 1, writer); write_text(writer, WRITE_URI, node->buf, node->n_bytes, '>'); - writer->sink(">", 1, writer->stream); + sink(">", 1, writer); return true; } return true; @@ -356,14 +367,14 @@ serd_writer_write_statement(SerdWriter* writer, switch (writer->syntax) { case SERD_NTRIPLES: write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags); - writer->sink(" ", 1, writer->stream); + sink(" ", 1, writer); write_node(writer, predicate, NULL, NULL, FIELD_PREDICATE, flags); - writer->sink(" ", 1, writer->stream); + sink(" ", 1, writer); if (!write_node(writer, object, object_datatype, object_lang, FIELD_OBJECT, flags)) { return SERD_ERR_UNKNOWN; } - writer->sink(" .\n", 3, writer->stream); + sink(" .\n", 3, writer); return SERD_SUCCESS; case SERD_TURTLE: break; @@ -372,7 +383,7 @@ serd_writer_write_statement(SerdWriter* writer, if (serd_node_equals(predicate, &writer->context.predicate)) { // Abbreviate S P if ((flags & SERD_ANON_O_BEGIN)) { - writer->sink(" , ", 3, writer->stream); // ] , [ + sink(" , ", 3, writer); // ] , [ } else { ++writer->indent; serd_writer_write_delim(writer, ','); @@ -391,7 +402,7 @@ serd_writer_write_statement(SerdWriter* writer, } write_node(writer, predicate, NULL, NULL, FIELD_PREDICATE, flags); copy_node(&writer->context.predicate, predicate); - writer->sink(" ", 1, writer->stream); + sink(" ", 1, writer); write_node(writer, object, object_datatype, object_lang, FIELD_OBJECT, flags); } @@ -423,7 +434,7 @@ serd_writer_write_statement(SerdWriter* writer, write_node(writer, predicate, NULL, NULL, FIELD_PREDICATE, flags); copy_node(&writer->context.predicate, predicate); - writer->sink(" ", 1, writer->stream); + sink(" ", 1, writer); write_node(writer, object, object_datatype, object_lang, FIELD_OBJECT, flags); @@ -462,7 +473,7 @@ serd_writer_end_anon(SerdWriter* writer, assert(writer->indent > 0); --writer->indent; serd_writer_write_delim(writer, '\n'); - writer->sink("]", 1, writer->stream); + sink("]", 1, writer); reset_context(writer, true); writer->context = *anon_stack_top(writer); serd_stack_pop(&writer->anon_stack, sizeof(WriteContext)); @@ -479,7 +490,10 @@ SerdStatus serd_writer_finish(SerdWriter* writer) { if (writer->context.subject.type) { - writer->sink(" .\n", 3, writer->stream); + sink(" .\n", 3, writer); + } + if (writer->style & SERD_STYLE_BULK) { + serd_bulk_sink_flush(&writer->bulk_sink); } reset_context(writer, true); return SERD_SUCCESS; @@ -508,6 +522,9 @@ serd_writer_new(SerdSyntax syntax, writer->bprefix_len = 0; writer->indent = 0; writer->empty = true; + if (style & SERD_STYLE_BULK) { + writer->bulk_sink = serd_bulk_sink_new(sink, stream, SERD_PAGE_SIZE); + } return writer; } @@ -538,12 +555,12 @@ serd_writer_set_base_uri(SerdWriter* writer, if (writer->syntax != SERD_NTRIPLES) { if (writer->context.graph.type || writer->context.subject.type) { - writer->sink(" .\n\n", 4, writer->stream); + sink(" .\n\n", 4, writer); reset_context(writer, false); } - writer->sink("@base <", 7, writer->stream); - writer->sink(uri->buf, uri->n_bytes, writer->stream); - writer->sink("> .\n", 4, writer->stream); + sink("@base <", 7, writer); + sink(uri->buf, uri->n_bytes, writer); + sink("> .\n", 4, writer); } reset_context(writer, false); return SERD_SUCCESS; @@ -560,14 +577,14 @@ serd_writer_set_prefix(SerdWriter* writer, if (!serd_env_set_prefix(writer->env, name, uri)) { if (writer->syntax != SERD_NTRIPLES) { if (writer->context.graph.type || writer->context.subject.type) { - writer->sink(" .\n\n", 4, writer->stream); + sink(" .\n\n", 4, writer); reset_context(writer, false); } - writer->sink("@prefix ", 8, writer->stream); - writer->sink(name->buf, name->n_bytes, writer->stream); - writer->sink(": <", 3, writer->stream); + sink("@prefix ", 8, writer); + sink(name->buf, name->n_bytes, writer); + sink(": <", 3, writer); write_text(writer, WRITE_URI, uri->buf, uri->n_bytes, '>'); - writer->sink("> .\n", 4, writer->stream); + sink("> .\n", 4, writer); } reset_context(writer, false); return SERD_SUCCESS; @@ -579,9 +596,11 @@ SERD_API void serd_writer_free(SerdWriter* writer) { - SerdWriter* const me = (SerdWriter*)writer; - serd_writer_finish(me); + serd_writer_finish(writer); serd_stack_free(&writer->anon_stack); free(writer->bprefix); - free(me); + if (writer->style & SERD_STYLE_BULK) { + serd_bulk_sink_free(&writer->bulk_sink); + } + free(writer); } diff --git a/wscript b/wscript index ebf69e7a..2d71f4c6 100644 --- a/wscript +++ b/wscript @@ -105,7 +105,6 @@ def build(bld): src/env.c src/node.c src/reader.c - src/sink.c src/string.c src/uri.c src/writer.c -- cgit v1.2.1