diff options
-rw-r--r-- | doc/serdi.1 | 4 | ||||
-rw-r--r-- | serd/serd.h | 56 | ||||
-rw-r--r-- | src/byte_sink.c | 111 | ||||
-rw-r--r-- | src/byte_sink.h | 79 | ||||
-rw-r--r-- | src/serdi.c | 38 | ||||
-rw-r--r-- | src/writer.c | 32 | ||||
-rw-r--r-- | tests/model_test.c | 21 | ||||
-rw-r--r-- | tests/serd_test.c | 48 | ||||
-rw-r--r-- | tests/terse_write_test.c | 10 | ||||
-rw-r--r-- | wscript | 16 |
10 files changed, 232 insertions, 183 deletions
diff --git a/doc/serdi.1 b/doc/serdi.1 index 23b5cd55..3e67046a 100644 --- a/doc/serdi.1 +++ b/doc/serdi.1 @@ -109,6 +109,10 @@ Write terser output without newlines. Display version information and exit. .TP +.BR \-w " " \fIFILENAME\fR +Write output to the given \fIFILENAME\fR instead of stdout. + +.TP .BR \-x Support parsing variable nodes. Variables can be written in SPARQL style, for example \*(lq?var\*(rq or \*(lq$var\*(rq. diff --git a/serd/serd.h b/serd/serd.h index 78a1d0cf..fe2a08b2 100644 --- a/serd/serd.h +++ b/serd/serd.h @@ -443,34 +443,63 @@ typedef size_t (*SerdWriteFunc)(const void* buf, void* stream); /** - Create a new byte sink + Create a new byte sink that writes to a buffer - @param write_func Function called with bytes to consume. - @param stream Context parameter passed to `sink`. + The `buffer` is owned by the caller, but will be expanded as necessary. + + @param buffer Buffer to write output to. +*/ +SERD_API +SerdByteSink* +serd_byte_sink_new_buffer(SerdBuffer* buffer); + +/** + Create a new byte sink that writes to a file + + An arbitrary `FILE*` can be used via serd_byte_sink_new_function() as well, + this is just a convenience function that opens the file properly and sets + flags for optimized I/O if possible. + + @param path Path of file to open and write to. @param block_size Number of bytes to write per call. */ SERD_API SerdByteSink* -serd_byte_sink_new(SerdWriteFunc write_func, void* stream, size_t block_size); +serd_byte_sink_new_filename(const char* path, size_t block_size); /** - Write to `sink` + Create a new byte sink that writes to a user-specified function + + The `stream` will be passed to the `write_func`, which is compatible with + the standard C `fwrite` if `stream` is a `FILE*`. - Compatible with SerdWriteFunc. + @param write_func Function called with bytes to consume. + @param stream Context parameter passed to `sink`. + @param block_size Number of bytes to write per call. */ SERD_API -size_t -serd_byte_sink_write(const void* buf, - size_t size, - size_t nmemb, - SerdByteSink* sink); +SerdByteSink* +serd_byte_sink_new_function(SerdWriteFunc write_func, + void* stream, + size_t block_size); /// Flush any pending output in `sink` to the underlying write function SERD_API void serd_byte_sink_flush(SerdByteSink* sink); -/// Free `sink` +/** + Close `sink`, including the underlying file if necessary. + + If `sink` was created with serd_byte_sink_new_filename(), then the file is + closed. If there was an error, then SERD_ERR_UNKNOWN is returned and + `errno` is set. +*/ +SERD_API +SerdStatus +serd_byte_sink_close(SerdByteSink* sink); + +/// Free `sink`, flushing and closing first if necessary SERD_API void serd_byte_sink_free(SerdByteSink* sink); @@ -1368,8 +1397,7 @@ serd_writer_new(SerdWorld* world, SerdSyntax syntax, SerdWriterFlags flags, SerdEnv* env, - SerdWriteFunc write_func, - void* stream); + SerdByteSink* byte_sink); /// Free `writer` SERD_API diff --git a/src/byte_sink.c b/src/byte_sink.c index a4eb8098..03b46f03 100644 --- a/src/byte_sink.c +++ b/src/byte_sink.c @@ -14,32 +14,48 @@ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#include "int_math.h" +#define _POSIX_C_SOURCE 200809L /* for posix_fadvise and fileno */ + +#include "byte_sink.h" + +#include "serd_config.h" #include "system.h" #include "serd/serd.h" -#include <assert.h> #include <stddef.h> +#include <stdio.h> #include <stdlib.h> -#include <string.h> -struct SerdByteSinkImpl { - SerdWriteFunc sink; - void* stream; - char* buf; - size_t size; - size_t block_size; -}; +#if defined(HAVE_POSIX_FADVISE) || defined(HAVE_FILENO) +# include <fcntl.h> +#endif SerdByteSink* -serd_byte_sink_new(SerdWriteFunc write_func, void* stream, size_t block_size) +serd_byte_sink_new_buffer(SerdBuffer* const buffer) { SerdByteSink* sink = (SerdByteSink*)calloc(1, sizeof(SerdByteSink)); - sink->sink = write_func; + sink->write_func = serd_buffer_sink; + sink->stream = buffer; + sink->block_size = 1; + sink->type = TO_BUFFER; + + return sink; +} + +static SerdByteSink* +serd_byte_sink_new_internal(const SerdWriteFunc write_func, + void* const stream, + const size_t block_size, + const SerdByteSinkType type) +{ + SerdByteSink* sink = (SerdByteSink*)calloc(1, sizeof(SerdByteSink)); + + sink->write_func = write_func; sink->stream = stream; sink->block_size = block_size; + sink->type = type; if (block_size > 1) { sink->buf = (char*)serd_allocate_buffer(block_size); @@ -48,56 +64,63 @@ serd_byte_sink_new(SerdWriteFunc write_func, void* stream, size_t block_size) return sink; } -size_t -serd_byte_sink_write(const void* buf, - size_t size, - size_t nmemb, - SerdByteSink* sink) +SerdByteSink* +serd_byte_sink_new_filename(const char* const path, const size_t block_size) { - assert(size == 1); - (void)size; - - if (nmemb == 0) { - return 0; - } else if (sink->block_size == 1) { - return sink->sink(buf, 1, nmemb, sink->stream); + FILE* const file = fopen(path, "wb"); + if (!file) { + return NULL; } - const size_t orig_len = nmemb; - while (nmemb) { - const size_t space = sink->block_size - sink->size; - const size_t n = MIN(space, nmemb); - - // Write as much as possible into the remaining buffer space - memcpy(sink->buf + sink->size, buf, n); - sink->size += n; - buf = (const char*)buf + n; - nmemb -= n; - - // Flush page if buffer is full - if (sink->size == sink->block_size) { - sink->sink(sink->buf, 1, sink->block_size, sink->stream); - sink->size = 0; - } - } +#if defined(HAVE_POSIX_FADVISE) && defined(HAVE_FILENO) + posix_fadvise(fileno(file), 0, 0, POSIX_FADV_SEQUENTIAL); +#endif + + return serd_byte_sink_new_internal((SerdWriteFunc)fwrite, + file, + block_size, + TO_FILENAME); +} - return orig_len; +SerdByteSink* +serd_byte_sink_new_function(const SerdWriteFunc write_func, + void* const stream, + const size_t block_size) +{ + return serd_byte_sink_new_internal(write_func, + stream, + block_size, + TO_FUNCTION); } void serd_byte_sink_flush(SerdByteSink* sink) { if (sink->block_size > 1 && sink->size > 0) { - sink->sink(sink->buf, 1, sink->size, sink->stream); + sink->write_func(sink->buf, 1, sink->size, sink->stream); sink->size = 0; } } +SerdStatus +serd_byte_sink_close(SerdByteSink* sink) +{ + serd_byte_sink_flush(sink); + + if (sink->type == TO_FILENAME && sink->stream) { + const int st = fclose((FILE*)sink->stream); + sink->stream = NULL; + return st ? SERD_ERR_UNKNOWN : SERD_SUCCESS; + } + + return SERD_SUCCESS; +} + void serd_byte_sink_free(SerdByteSink* sink) { if (sink) { - serd_byte_sink_flush(sink); + serd_byte_sink_close(sink); free(sink->buf); free(sink); } diff --git a/src/byte_sink.h b/src/byte_sink.h index 1be84b48..b90cf4a5 100644 --- a/src/byte_sink.h +++ b/src/byte_sink.h @@ -17,79 +17,54 @@ #ifndef SERD_BYTE_SINK_H #define SERD_BYTE_SINK_H -#include "serd_internal.h" -#include "system.h" - #include "serd/serd.h" #include <stddef.h> #include <string.h> -typedef struct SerdByteSinkImpl { - SerdWriteFunc sink; - void* stream; - char* buf; - size_t size; - size_t block_size; -} SerdByteSink; +typedef enum { + TO_BUFFER, ///< Writing to a user-provided buffer + TO_FILENAME, ///< Writing to a file we opened + TO_FILE, ///< Writing to a user-provided file + TO_FUNCTION, ///< Writing to a user-provided function +} SerdByteSinkType; -static inline SerdByteSink -serd_byte_sink_new(SerdWriteFunc sink, void* stream, size_t block_size) -{ - SerdByteSink bsink; - bsink.sink = sink; - bsink.stream = stream; - bsink.size = 0; - bsink.block_size = block_size; - bsink.buf = ((block_size > 1) - ? (char*)serd_bufalloc(block_size) - : NULL); - return bsink; -} - -static inline void -serd_byte_sink_flush(SerdByteSink* bsink) -{ - if (bsink->block_size > 1 && bsink->size > 0) { - bsink->sink(bsink->buf, 1, bsink->size, bsink->stream); - bsink->size = 0; - } -} - -static inline void -serd_byte_sink_free(SerdByteSink* bsink) -{ - serd_byte_sink_flush(bsink); - free(bsink->buf); - bsink->buf = NULL; -} +struct SerdByteSinkImpl { + SerdWriteFunc write_func; ///< User sink for TO_FUNCTION + void* stream; ///< Handle for TO_FILE* and TO_FUNCTION + char* buf; ///< Local buffer iff block_size > 1 + size_t size; ///< Bytes written so far in this chunk + size_t block_size; ///< Size of chunks to write + SerdByteSinkType type; ///< Type of output +}; static inline size_t -serd_byte_sink_write(const void* buf, size_t len, SerdByteSink* bsink) +serd_byte_sink_write(const void* buf, size_t len, SerdByteSink* const sink) { if (len == 0) { return 0; - } else if (bsink->block_size == 1) { - return bsink->sink(buf, 1, len, bsink->stream); + } else if (sink->block_size == 1) { + return sink->write_func(buf, 1, len, sink->stream); } const size_t orig_len = len; while (len) { - const size_t space = bsink->block_size - bsink->size; - const size_t n = MIN(space, len); + const size_t space = sink->block_size - sink->size; + const size_t n = space < len ? space : len; // Write as much as possible into the remaining buffer space - memcpy(bsink->buf + bsink->size, buf, n); - bsink->size += n; - buf = (const char*)buf + n; - len -= n; + memcpy(sink->buf + sink->size, buf, n); + sink->size += n; + buf = (const char*)buf + n; + len -= n; // Flush page if buffer is full - if (bsink->size == bsink->block_size) { - bsink->sink(bsink->buf, 1, bsink->block_size, bsink->stream); - bsink->size = 0; + if (sink->size == sink->block_size) { + sink->write_func(sink->buf, 1, sink->block_size, sink->stream); + sink->size = 0; } } + return orig_len; } diff --git a/src/serdi.c b/src/serdi.c index 1e9ceb88..0fc30239 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -236,6 +236,7 @@ main(int argc, char** argv) const char* add_prefix = ""; const char* chop_prefix = NULL; const char* root_uri = NULL; + const char* out_filename = NULL; int a = 1; for (; a < argc && argv[a][0] == '-'; ++a) { if (argv[a][1] == '\0') { @@ -323,6 +324,11 @@ main(int argc, char** argv) return missing_arg(argv[0], 'r'); } root_uri = argv[a]; + } else if (argv[a][1] == 'w') { + if (++a == argc) { + return missing_arg(argv[0], 'w'); + } + out_filename = argv[a]; } else if (argv[a][1] == 'x') { reader_flags |= SERD_READ_VARIABLES; } else { @@ -336,11 +342,6 @@ main(int argc, char** argv) return 1; } -#ifdef _WIN32 - _setmode(_fileno(stdin), _O_BINARY); - _setmode(_fileno(stdout), _O_BINARY); -#endif - char** inputs = argv + a; int n_inputs = argc - a; @@ -366,21 +367,35 @@ main(int argc, char** argv) } } - FILE* out_fd = stdout; SerdWorld* world = serd_world_new(); SerdEnv* env = serd_env_new(base); +#ifdef _WIN32 + _setmode(_fileno(stdin), _O_BINARY); + if (!out_filename) { + _setmode(_fileno(stdout), _O_BINARY); + } +#endif + const SerdSerialisationFlags serialisation_flags = no_inline ? SERD_NO_INLINE_OBJECTS : 0u; - SerdByteSink* byte_sink = serd_byte_sink_new( - (SerdWriteFunc)fwrite, out_fd, bulk_write ? 4096u : 1u); + const size_t block_size = bulk_write ? 4096u : 1u; + SerdByteSink* byte_sink = + out_filename ? serd_byte_sink_new_filename(out_filename, block_size) + : serd_byte_sink_new_function((SerdWriteFunc)fwrite, + stdout, + block_size); + + if (!byte_sink) { + perror("serdi: error opening output file"); + return 1; + } SerdWriter* writer = serd_writer_new(world, output_syntax, writer_flags, env, - (SerdWriteFunc)serd_byte_sink_write, byte_sink); SerdModel* model = NULL; @@ -490,15 +505,16 @@ main(int argc, char** argv) serd_sink_free(inserter); serd_model_free(model); serd_writer_free(writer); - serd_byte_sink_free(byte_sink); serd_env_free(env); serd_node_free(base); serd_world_free(world); - if (fclose(stdout)) { + if (serd_byte_sink_close(byte_sink) || (!out_filename && fclose(stdout))) { perror("serdi: write error"); st = SERD_ERR_UNKNOWN; } + serd_byte_sink_free(byte_sink); + return (st > SERD_FAILURE) ? 1 : 0; } diff --git a/src/writer.c b/src/writer.c index 697d0e7c..ed3be4e7 100644 --- a/src/writer.c +++ b/src/writer.c @@ -14,6 +14,7 @@ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#include "byte_sink.h" #include "env.h" #include "node.h" #include "sink.h" @@ -131,8 +132,7 @@ struct SerdWriterImpl { SerdURI root_uri; WriteContext* anon_stack; size_t anon_stack_size; - SerdWriteFunc write_func; - void* stream; + SerdByteSink* byte_sink; SerdLogFunc log_func; void* log_handle; WriteContext context; @@ -236,7 +236,7 @@ ctx(SerdWriter* writer, const SerdField field) SERD_WARN_UNUSED_RESULT static inline size_t sink(const void* buf, size_t len, SerdWriter* writer) { - const size_t written = writer->write_func(buf, 1, len, writer->stream); + const size_t written = serd_byte_sink_write(buf, len, writer->byte_sink); if (written != len) { if (errno) { SERD_LOG_ERRORF(writer->world, SERD_ERR_BAD_WRITE, @@ -1077,22 +1077,22 @@ serd_writer_new(SerdWorld* world, SerdSyntax syntax, SerdWriterFlags flags, SerdEnv* env, - SerdWriteFunc write_func, - void* stream) + SerdByteSink* byte_sink) { const WriteContext context = WRITE_CONTEXT_NULL; SerdWriter* writer = (SerdWriter*)calloc(1, sizeof(SerdWriter)); - writer->world = world; - writer->syntax = syntax; - writer->flags = flags; - writer->env = env; - writer->root_node = NULL; - writer->root_uri = SERD_URI_NULL; - writer->anon_stack = (WriteContext*)calloc(anon_stack_capacity, sizeof(WriteContext)); - writer->write_func = write_func; - writer->stream = stream; - writer->context = context; - writer->empty = true; + + writer->world = world; + writer->syntax = syntax; + writer->flags = flags; + writer->env = env; + writer->root_node = NULL; + writer->root_uri = SERD_URI_NULL; + writer->anon_stack = + (WriteContext*)calloc(anon_stack_capacity, sizeof(WriteContext)); + writer->byte_sink = byte_sink; + writer->context = context; + writer->empty = true; writer->iface.handle = writer; writer->iface.on_event = (SerdEventFunc)serd_writer_on_event; diff --git a/tests/model_test.c b/tests/model_test.c index 694fc119..bf717df0 100644 --- a/tests/model_test.c +++ b/tests/model_test.c @@ -723,10 +723,11 @@ test_write_bad_list(SerdWorld* world, const unsigned n_quads) serd_model_add(model, list2, prest, norest, NULL); serd_model_add(model, norest, pfirst, val2, NULL); - SerdBuffer buffer = {NULL, 0}; - SerdEnv* env = serd_env_new(NULL); - SerdWriter* writer = serd_writer_new( - world, SERD_TURTLE, 0, env, serd_buffer_sink, &buffer); + SerdBuffer buffer = {NULL, 0}; + SerdEnv* env = serd_env_new(NULL); + SerdByteSink* byte_sink = serd_byte_sink_new_buffer(&buffer); + + SerdWriter* writer = serd_writer_new(world, SERD_TURTLE, 0, env, byte_sink); SerdRange* all = serd_model_all(model); serd_range_serialise(all, serd_writer_get_sink(writer), 0); @@ -746,6 +747,7 @@ test_write_bad_list(SerdWorld* world, const unsigned n_quads) free(buffer.buf); serd_writer_free(writer); + serd_byte_sink_free(byte_sink); serd_model_free(model); serd_env_free(env); serd_nodes_free(nodes); @@ -798,9 +800,13 @@ test_write_error_in_list(SerdWorld* world, const unsigned n_quads) SerdEnv* env = serd_env_new(NULL); for (size_t max_successes = 0; max_successes < 21; ++max_successes) { - FailingWriteFuncState state = {0, max_successes}; - SerdWriter* writer = serd_writer_new( - world, SERD_TURTLE, 0, env, failing_write_func, &state); + FailingWriteFuncState state = {0, max_successes}; + + SerdByteSink* byte_sink = + serd_byte_sink_new_function(failing_write_func, &state, 1); + + SerdWriter* writer = + serd_writer_new(world, SERD_TURTLE, 0, env, byte_sink); const SerdSink* const sink = serd_writer_get_sink(writer); SerdRange* const all = serd_model_all(model); @@ -810,6 +816,7 @@ test_write_error_in_list(SerdWorld* world, const unsigned n_quads) assert(st == SERD_ERR_BAD_WRITE); serd_writer_free(writer); + serd_byte_sink_free(byte_sink); } serd_env_free(env); diff --git a/tests/serd_test.c b/tests/serd_test.c index 82126c0c..a0fbb234 100644 --- a/tests/serd_test.c +++ b/tests/serd_test.c @@ -175,16 +175,16 @@ test_get_blank(void) static int test_strict_write(void) { - SerdWorld* world = serd_world_new(); - const char* path = "serd_strict_write_test.ttl"; - FILE* fd = fopen(path, "wb"); - SerdEnv* env = serd_env_new(NULL); - SerdWriter* writer = serd_writer_new(world, - SERD_TURTLE, - 0, - env, - (SerdWriteFunc)fwrite, - fd); + SerdWorld* world = serd_world_new(); + const char* path = "serd_strict_write_test.ttl"; + FILE* fd = fopen(path, "wb"); + SerdEnv* env = serd_env_new(NULL); + + SerdByteSink* byte_sink = + serd_byte_sink_new_function((SerdWriteFunc)fwrite, fd, 1); + + SerdWriter* writer = serd_writer_new(world, SERD_TURTLE, 0, env, byte_sink); + assert(fd); assert(writer); @@ -202,6 +202,7 @@ test_strict_write(void) serd_node_free(p); serd_writer_free(writer); + serd_byte_sink_free(byte_sink); serd_env_free(env); fclose(fd); serd_world_free(world); @@ -559,12 +560,12 @@ test_writer(const char* const path) SerdWorld* world = serd_world_new(); - SerdWriter* writer = serd_writer_new(world, - SERD_TURTLE, - SERD_WRITE_LAX, - env, - (SerdWriteFunc)fwrite, - fd); + SerdByteSink* byte_sink = + serd_byte_sink_new_function((SerdWriteFunc)fwrite, fd, 1); + + SerdWriter* writer = + serd_writer_new(world, SERD_TURTLE, SERD_WRITE_LAX, env, byte_sink); + assert(writer); serd_writer_chop_blank_prefix(writer, "tmp"); @@ -633,6 +634,7 @@ test_writer(const char* const path) assert(!serd_sink_write(iface, 0, s, p, o, 0)); serd_writer_free(writer); + serd_byte_sink_free(byte_sink); serd_node_free(lit); serd_node_free(o); serd_node_free(t); @@ -640,16 +642,10 @@ test_writer(const char* const path) serd_node_free(urn_Type); // Test buffer sink - SerdBuffer buffer = { NULL, 0 }; - SerdByteSink* byte_sink = - serd_byte_sink_new((SerdWriteFunc)serd_buffer_sink, &buffer, 1); - - writer = serd_writer_new(world, - SERD_TURTLE, - 0, - env, - (SerdWriteFunc)serd_byte_sink_write, - byte_sink); + SerdBuffer buffer = {NULL, 0}; + + byte_sink = serd_byte_sink_new_buffer(&buffer); + writer = serd_writer_new(world, SERD_TURTLE, 0, env, byte_sink); o = serd_new_uri("http://example.org/base"); assert(!serd_writer_set_base_uri(writer, o)); diff --git a/tests/terse_write_test.c b/tests/terse_write_test.c index 4b6a80c4..924dfbc9 100644 --- a/tests/terse_write_test.c +++ b/tests/terse_write_test.c @@ -67,13 +67,8 @@ test(void) serd_env_set_prefix_from_strings(env, "rdf", NS_RDF); - SerdWriter* writer = serd_writer_new(world, - SERD_TURTLE, - 0, - env, - (SerdWriteFunc)serd_buffer_sink, - &buffer); - + SerdByteSink* byte_sink = serd_byte_sink_new_buffer(&buffer); + SerdWriter* writer = serd_writer_new(world, SERD_TURTLE, 0, env, byte_sink); const SerdSink* sink = serd_writer_get_sink(writer); // Simple lone list @@ -102,6 +97,7 @@ test(void) serd_buffer_sink_finish(&buffer); serd_writer_free(writer); + serd_byte_sink_free(byte_sink); serd_nodes_free(nodes); serd_env_free(env); serd_world_free(world); @@ -625,8 +625,7 @@ def test_suite(ctx, if expected_return == 0: # Run model test for positive test (must succeed) out_path = action + '.model.out' - check([command[0]] + ['-m'] + command[1:], - stdout=out_path, + check([command[0]] + ['-w', out_path, '-m'] + command[1:], name=action + ' model') if result and ((mf + 'result') in model[test]): @@ -723,8 +722,9 @@ def test(tst): out_path = in_path + '.io' check_path = '%s/tests/good/%s' % (srcdir, check_name) - check([serdi, '-o', lang, '-I', in_path, '%s/%s' % (srcdir, in_path)], - stdout=out_path, name=in_name) + check([serdi, '-o', lang, '-I', in_path, '-w', out_path, + '%s/%s' % (srcdir, in_path)], + name=in_name) check.file_equals(check_path, out_path) @@ -751,8 +751,8 @@ def test(tst): with tst.group('MultiFile') as check: path = '%s/tests/multifile' % srcdir - check([serdi, '%s/input1.ttl' % path, '%s/input2.trig' % path], - stdout='tests/multifile/output.out.nq') + check([serdi, '-w', 'tests/multifile/output.out.nq', + '%s/input1.ttl' % path, '%s/input2.trig' % path]) check.file_equals('%s/tests/multifile/output.nq' % srcdir, 'tests/multifile/output.out.nq') @@ -802,6 +802,10 @@ def test(tst): stdout='/dev/full', name='Short write error') check([serdi, 'file://%s/tests/good/manifest.ttl' % srcdir], stdout='/dev/full', name='Long write error') + if os.path.exists('/proc/cpuinfo'): + check([serdi, '-w', '/proc/cpuinfo', + 'file://%s/tests/good/base.ttl' % srcdir], + name='Read-only write error') if sys.version_info.major >= 3: from waflib.extras import autoship |