diff options
author | David Robillard <d@drobilla.net> | 2020-06-28 19:46:47 +0200 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2021-03-08 23:23:06 -0500 |
commit | f7b993d5aff1274e010d45304b12109f6de7e120 (patch) | |
tree | 6fadbff8f2e696ce62ba1237bee16c07776cd03e | |
parent | 2fb247d2beb71539ceba8f2841d7c1bad933ab36 (diff) | |
download | serd-f7b993d5aff1274e010d45304b12109f6de7e120.tar.gz serd-f7b993d5aff1274e010d45304b12109f6de7e120.tar.bz2 serd-f7b993d5aff1274e010d45304b12109f6de7e120.zip |
WIP: Make Writer always write to a ByteSink
-rw-r--r-- | doc/serdi.1 | 10 | ||||
-rw-r--r-- | include/serd/serd.h | 58 | ||||
-rw-r--r-- | src/.clang-tidy | 1 | ||||
-rw-r--r-- | src/byte_sink.c | 107 | ||||
-rw-r--r-- | src/byte_sink.h | 76 | ||||
-rw-r--r-- | src/node.c | 1 | ||||
-rw-r--r-- | src/serdi.c | 45 | ||||
-rw-r--r-- | src/writer.c | 29 | ||||
-rw-r--r-- | test/test_reader_writer.c | 25 | ||||
-rw-r--r-- | test/test_terse_write.c | 8 |
10 files changed, 202 insertions, 158 deletions
diff --git a/doc/serdi.1 b/doc/serdi.1 index 7cb2c923..dba9b34c 100644 --- a/doc/serdi.1 +++ b/doc/serdi.1 @@ -13,13 +13,13 @@ .Op Fl o Ar syntax .Op Fl p Ar prefix .Op Fl r Ar root -.Op Fl s Ar input +.Op Fl s Ar string +.Op Fl w Ar filename .Ar input -.Op Ar base_uri .Sh DESCRIPTION .Nm is a fast command-line utility for streaming and processing RDF data. -It reads an RDF document and writes the data to stdout, +It reads an RDF document and writes the data again, possibly transformed and/or in a different syntax. By default, the input syntax is guessed from the file extension, @@ -125,6 +125,10 @@ Write terser output without newlines. .Pp .It Fl v Display version information and exit. +.It Fl w Ar filename +Write output to the given +.Ar filename +instead of stdout. .El .Sh EXIT STATUS .Nm diff --git a/include/serd/serd.h b/include/serd/serd.h index af087ab5..61ae6099 100644 --- a/include/serd/serd.h +++ b/include/serd/serd.h @@ -419,36 +419,63 @@ typedef size_t (*SerdWriteFunc)(const void* SERD_NONNULL buf, void* SERD_NONNULL stream); /** - Create a new byte sink. + Create a new byte sink that writes to a buffer. - @param write_func Function called with bytes to consume. - @param stream Context parameter passed to `sink`. + The `buffer` is owned by the caller, but will be expanded as necessary. + + @param buffer Buffer to write output to. +*/ +SERD_API +SerdByteSink* SERD_ALLOCATED +serd_byte_sink_new_buffer(SerdBuffer* SERD_NONNULL buffer); + +/** + Create a new byte sink that writes to a file + + An arbitrary `FILE*` can be used via serd_byte_sink_new_function() as well, + this is just a convenience function that opens the file properly and sets + flags for optimized I/O if possible. + + @param path Path of file to open and write to. @param block_size Number of bytes to write per call. */ SERD_API SerdByteSink* SERD_ALLOCATED -serd_byte_sink_new(SerdWriteFunc SERD_NONNULL write_func, - void* SERD_NULLABLE stream, - size_t block_size); +serd_byte_sink_new_filename(const char* SERD_NONNULL path, size_t block_size); /** - Write to `sink`. + Create a new byte sink that writes to a user-specified function. + + The `stream` will be passed to the `write_func`, which is compatible with + the standard C `fwrite` if `stream` is a `FILE*`. - Compatible with SerdWriteFunc. + @param write_func Function called with bytes to consume. + @param stream Context parameter passed to `sink`. + @param block_size Number of bytes to write per call. */ SERD_API -size_t -serd_byte_sink_write(const void* SERD_NONNULL buf, - size_t size, - size_t nmemb, - SerdByteSink* SERD_NONNULL sink); +SerdByteSink* SERD_ALLOCATED +serd_byte_sink_new_function(SerdWriteFunc SERD_NONNULL write_func, + void* SERD_NULLABLE stream, + size_t block_size); /// Flush any pending output in `sink` to the underlying write function SERD_API void serd_byte_sink_flush(SerdByteSink* SERD_NONNULL sink); -/// Free `sink` +/** + Close `sink`, including the underlying file if necessary. + + If `sink` was created with serd_byte_sink_new_filename(), then the file is + closed. If there was an error, then SERD_ERR_UNKNOWN is returned and + `errno` is set. +*/ +SERD_API +SerdStatus +serd_byte_sink_close(SerdByteSink* SERD_NONNULL sink); + +/// Free `sink`, flushing and closing first if necessary SERD_API void serd_byte_sink_free(SerdByteSink* SERD_NULLABLE sink); @@ -1297,8 +1324,7 @@ serd_writer_new(SerdWorld* SERD_NONNULL world, SerdSyntax syntax, SerdWriterFlags flags, SerdEnv* SERD_NONNULL env, - SerdWriteFunc SERD_NONNULL write_func, - void* SERD_NULLABLE stream); + SerdByteSink* SERD_NONNULL byte_sink); /// Free `writer` SERD_API diff --git a/src/.clang-tidy b/src/.clang-tidy index 79f266d9..af60b7a5 100644 --- a/src/.clang-tidy +++ b/src/.clang-tidy @@ -2,6 +2,7 @@ Checks: > *, -*-magic-numbers, -*-uppercase-literal-suffix, + -android-cloexec-fopen, -bugprone-branch-clone, -bugprone-reserved-identifier, -bugprone-suspicious-string-compare, diff --git a/src/byte_sink.c b/src/byte_sink.c index a90f503d..7aaec065 100644 --- a/src/byte_sink.c +++ b/src/byte_sink.c @@ -14,32 +14,48 @@ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#include "macros.h" +#define _POSIX_C_SOURCE 200809L /* for posix_fadvise and fileno */ + +#include "byte_sink.h" + +#include "serd_config.h" #include "system.h" #include "serd/serd.h" -#include <assert.h> #include <stddef.h> +#include <stdio.h> #include <stdlib.h> -#include <string.h> -struct SerdByteSinkImpl { - SerdWriteFunc sink; - void* stream; - char* buf; - size_t size; - size_t block_size; -}; +#if defined(USE_POSIX_FADVISE) || defined(USE_FILENO) +# include <fcntl.h> +#endif SerdByteSink* -serd_byte_sink_new(SerdWriteFunc write_func, void* stream, size_t block_size) +serd_byte_sink_new_buffer(SerdBuffer* const buffer) { SerdByteSink* sink = (SerdByteSink*)calloc(1, sizeof(SerdByteSink)); - sink->sink = write_func; + sink->write_func = serd_buffer_sink; + sink->stream = buffer; + sink->block_size = 1; + sink->type = TO_BUFFER; + + return sink; +} + +static SerdByteSink* +serd_byte_sink_new_internal(const SerdWriteFunc write_func, + void* const stream, + const size_t block_size, + const SerdByteSinkType type) +{ + SerdByteSink* sink = (SerdByteSink*)calloc(1, sizeof(SerdByteSink)); + + sink->write_func = write_func; sink->stream = stream; sink->block_size = block_size; + sink->type = type; if (block_size > 1) { sink->buf = (char*)serd_allocate_buffer(block_size); @@ -48,58 +64,59 @@ serd_byte_sink_new(SerdWriteFunc write_func, void* stream, size_t block_size) return sink; } -size_t -serd_byte_sink_write(const void* buf, - size_t size, - size_t nmemb, - SerdByteSink* sink) +SerdByteSink* +serd_byte_sink_new_filename(const char* const path, const size_t block_size) { - assert(size == 1); - (void)size; - - if (nmemb == 0) { - return 0; + FILE* const file = fopen(path, "wb"); + if (!file) { + return NULL; } - if (sink->block_size == 1) { - return sink->sink(buf, 1, nmemb, sink->stream); - } +#if defined(USE_POSIX_FADVISE) && defined(USE_FILENO) + posix_fadvise(fileno(file), 0, 0, POSIX_FADV_SEQUENTIAL); +#endif - const size_t orig_len = nmemb; - while (nmemb) { - const size_t space = sink->block_size - sink->size; - const size_t n = MIN(space, nmemb); - - // Write as much as possible into the remaining buffer space - memcpy(sink->buf + sink->size, buf, n); - sink->size += n; - buf = (const char*)buf + n; - nmemb -= n; - - // Flush page if buffer is full - if (sink->size == sink->block_size) { - sink->sink(sink->buf, 1, sink->block_size, sink->stream); - sink->size = 0; - } - } + return serd_byte_sink_new_internal( + (SerdWriteFunc)fwrite, file, block_size, TO_FILENAME); +} - return orig_len; +SerdByteSink* +serd_byte_sink_new_function(const SerdWriteFunc write_func, + void* const stream, + const size_t block_size) +{ + return serd_byte_sink_new_internal( + write_func, stream, block_size, TO_FUNCTION); } void serd_byte_sink_flush(SerdByteSink* sink) { if (sink->block_size > 1 && sink->size > 0) { - sink->sink(sink->buf, 1, sink->size, sink->stream); + sink->write_func(sink->buf, 1, sink->size, sink->stream); sink->size = 0; } } +SerdStatus +serd_byte_sink_close(SerdByteSink* sink) +{ + serd_byte_sink_flush(sink); + + if (sink->type == TO_FILENAME && sink->stream) { + const int st = fclose((FILE*)sink->stream); + sink->stream = NULL; + return st ? SERD_ERR_UNKNOWN : SERD_SUCCESS; + } + + return SERD_SUCCESS; +} + void serd_byte_sink_free(SerdByteSink* sink) { if (sink) { - serd_byte_sink_flush(sink); + serd_byte_sink_close(sink); free(sink->buf); free(sink); } diff --git a/src/byte_sink.h b/src/byte_sink.h index 576f9c2e..abbe55ff 100644 --- a/src/byte_sink.h +++ b/src/byte_sink.h @@ -17,80 +17,56 @@ #ifndef SERD_BYTE_SINK_H #define SERD_BYTE_SINK_H -#include "serd_internal.h" -#include "system.h" - #include "serd/serd.h" #include <stddef.h> #include <string.h> -typedef struct SerdByteSinkImpl { - SerdWriteFunc sink; - void* stream; - char* buf; - size_t size; - size_t block_size; -} SerdByteSink; - -static inline SerdByteSink -serd_byte_sink_new(SerdWriteFunc sink, void* stream, size_t block_size) -{ - SerdByteSink bsink; - bsink.sink = sink; - bsink.stream = stream; - bsink.size = 0; - bsink.block_size = block_size; - bsink.buf = - ((block_size > 1) ? (char*)serd_allocate_buffer(block_size) : NULL); - return bsink; -} - -static inline void -serd_byte_sink_flush(SerdByteSink* bsink) -{ - if (bsink->block_size > 1 && bsink->size > 0) { - bsink->sink(bsink->buf, 1, bsink->size, bsink->stream); - bsink->size = 0; - } -} - -static inline void -serd_byte_sink_free(SerdByteSink* bsink) -{ - serd_byte_sink_flush(bsink); - serd_free_aligned(bsink->buf); - bsink->buf = NULL; -} +typedef enum { + TO_BUFFER, ///< Writing to a user-provided buffer + TO_FILENAME, ///< Writing to a file we opened + TO_FILE, ///< Writing to a user-provided file + TO_FUNCTION, ///< Writing to a user-provided function +} SerdByteSinkType; + +struct SerdByteSinkImpl { + SerdWriteFunc write_func; ///< User sink for TO_FUNCTION + void* stream; ///< Handle for TO_FILE* and TO_FUNCTION + char* buf; ///< Local buffer iff block_size > 1 + size_t size; ///< Bytes written so far in this chunk + size_t block_size; ///< Size of chunks to write + SerdByteSinkType type; ///< Type of output +}; static inline size_t -serd_byte_sink_write(const void* buf, size_t len, SerdByteSink* bsink) +serd_byte_sink_write(const void* buf, size_t len, SerdByteSink* const sink) { if (len == 0) { return 0; } - if (bsink->block_size == 1) { - return bsink->sink(buf, 1, len, bsink->stream); + if (sink->block_size == 1) { + return sink->write_func(buf, 1, len, sink->stream); } const size_t orig_len = len; while (len) { - const size_t space = bsink->block_size - bsink->size; - const size_t n = MIN(space, len); + const size_t space = sink->block_size - sink->size; + const size_t n = space < len ? space : len; // Write as much as possible into the remaining buffer space - memcpy(bsink->buf + bsink->size, buf, n); - bsink->size += n; + memcpy(sink->buf + sink->size, buf, n); + sink->size += n; buf = (const char*)buf + n; len -= n; // Flush page if buffer is full - if (bsink->size == bsink->block_size) { - bsink->sink(bsink->buf, 1, bsink->block_size, bsink->stream); - bsink->size = 0; + if (sink->size == sink->block_size) { + sink->write_func(sink->buf, 1, sink->block_size, sink->stream); + sink->size = 0; } } + return orig_len; } @@ -615,6 +615,7 @@ serd_new_real_file_uri(const char* const path, const char* const hostname) SerdNode* const node = serd_new_file_uri(SERD_MEASURE_STRING(real_path), SERD_MEASURE_STRING(hostname)); + free(real_path); return node; } diff --git a/src/serdi.c b/src/serdi.c index e546f463..8b72945a 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -110,6 +110,7 @@ main(int argc, char** argv) const char* add_prefix = NULL; const char* chop_prefix = NULL; const char* root_uri = NULL; + const char* out_filename = NULL; int a = 1; for (; a < argc && argv[a][0] == '-'; ++a) { if (argv[a][1] == '\0') { @@ -188,6 +189,11 @@ main(int argc, char** argv) } root_uri = argv[a]; + } else if (argv[a][1] == 'w') { + if (++a == argc) { + return missing_arg(argv[0], 'w'); + } + out_filename = argv[a]; } else { SERDI_ERRORF("invalid option -- '%s'\n", argv[a] + 1); return print_usage(argv[0], true); @@ -199,11 +205,6 @@ main(int argc, char** argv) return 1; } -#ifdef _WIN32 - _setmode(_fileno(stdin), _O_BINARY); - _setmode(_fileno(stdout), _O_BINARY); -#endif - const char* input = argv[a++]; if (!input_syntax && !(input_syntax = serd_guess_syntax(input))) { @@ -222,20 +223,29 @@ main(int argc, char** argv) base = serd_new_file_uri(SERD_MEASURE_STRING(input), SERD_EMPTY_STRING()); } - FILE* const out_fd = stdout; - SerdWorld* const world = serd_world_new(); - SerdEnv* const env = serd_env_new(serd_node_string_view(base)); + SerdWorld* const world = serd_world_new(); + SerdEnv* const env = serd_env_new(serd_node_string_view(base)); +#ifdef _WIN32 + _setmode(_fileno(stdin), _O_BINARY); + if (!out_filename) { + _setmode(_fileno(stdout), _O_BINARY); + } +#endif + + const size_t block_size = bulk_write ? 4096u : 1u; SerdByteSink* const byte_sink = - serd_byte_sink_new((SerdWriteFunc)fwrite, out_fd, bulk_write ? 4096u : 1u); + out_filename + ? serd_byte_sink_new_filename(out_filename, block_size) + : serd_byte_sink_new_function((SerdWriteFunc)fwrite, stdout, block_size); + + if (!byte_sink) { + perror("serdi: error opening output file"); + return 1; + } SerdWriter* const writer = - serd_writer_new(world, - output_syntax, - writer_flags, - env, - (SerdWriteFunc)serd_byte_sink_write, - byte_sink); + serd_writer_new(world, output_syntax, writer_flags, env, byte_sink); SerdReader* const reader = serd_reader_new( world, input_syntax, reader_flags, serd_writer_sink(writer), stack_size); @@ -275,15 +285,16 @@ main(int argc, char** argv) serd_reader_free(reader); serd_writer_free(writer); serd_node_free(input_name); - serd_byte_sink_free(byte_sink); serd_env_free(env); serd_node_free(base); serd_world_free(world); - if (fclose(stdout)) { + if (serd_byte_sink_close(byte_sink) || (!out_filename && fclose(stdout))) { perror("serdi: write error"); st = SERD_ERR_UNKNOWN; } + serd_byte_sink_free(byte_sink); + return (st > SERD_FAILURE) ? 1 : 0; } diff --git a/src/writer.c b/src/writer.c index 0f54c0d3..b1404a91 100644 --- a/src/writer.c +++ b/src/writer.c @@ -14,6 +14,7 @@ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#include "byte_sink.h" #include "env.h" #include "node.h" #include "sink.h" @@ -131,8 +132,7 @@ struct SerdWriterImpl { SerdURIView root_uri; WriteContext* anon_stack; size_t anon_stack_size; - SerdWriteFunc write_func; - void* stream; + SerdByteSink* byte_sink; SerdErrorFunc error_func; void* error_handle; WriteContext context; @@ -232,7 +232,7 @@ ctx(SerdWriter* writer, const SerdField field) SERD_WARN_UNUSED_RESULT static inline size_t sink(const void* buf, size_t len, SerdWriter* writer) { - const size_t written = writer->write_func(buf, 1, len, writer->stream); + const size_t written = serd_byte_sink_write(buf, len, writer->byte_sink); if (written != len) { if (errno) { serd_world_errorf(writer->world, @@ -1163,23 +1163,22 @@ serd_writer_new(SerdWorld* world, SerdSyntax syntax, SerdWriterFlags flags, SerdEnv* env, - SerdWriteFunc write_func, - void* stream) + SerdByteSink* byte_sink) { const WriteContext context = WRITE_CONTEXT_NULL; SerdWriter* writer = (SerdWriter*)calloc(1, sizeof(SerdWriter)); - writer->world = world; - writer->syntax = syntax; - writer->flags = flags; - writer->env = env; - writer->root_node = NULL; - writer->root_uri = SERD_URI_NULL; + + writer->world = world; + writer->syntax = syntax; + writer->flags = flags; + writer->env = env; + writer->root_node = NULL; + writer->root_uri = SERD_URI_NULL; writer->anon_stack = (WriteContext*)calloc(anon_stack_capacity, sizeof(WriteContext)); - writer->write_func = write_func; - writer->stream = stream; - writer->context = context; - writer->empty = true; + writer->byte_sink = byte_sink; + writer->context = context; + writer->empty = true; writer->iface.handle = writer; writer->iface.on_event = (SerdEventFunc)serd_writer_on_event; diff --git a/test/test_reader_writer.c b/test/test_reader_writer.c index a3e99fa4..a4f57a0f 100644 --- a/test/test_reader_writer.c +++ b/test/test_reader_writer.c @@ -152,8 +152,11 @@ test_strict_write(void) const char* path = "serd_strict_write_test.ttl"; FILE* fd = fopen(path, "wb"); SerdEnv* env = serd_env_new(SERD_EMPTY_STRING()); - SerdWriter* writer = - serd_writer_new(world, SERD_TURTLE, 0, env, (SerdWriteFunc)fwrite, fd); + + SerdByteSink* byte_sink = + serd_byte_sink_new_function((SerdWriteFunc)fwrite, fd, 1); + + SerdWriter* writer = serd_writer_new(world, SERD_TURTLE, 0, env, byte_sink); assert(fd); assert(writer); @@ -175,6 +178,7 @@ test_strict_write(void) serd_node_free(p); serd_writer_free(writer); + serd_byte_sink_free(byte_sink); serd_env_free(env); fclose(fd); serd_world_free(world); @@ -218,8 +222,12 @@ test_writer(const char* const path) SerdWorld* world = serd_world_new(); - SerdWriter* writer = serd_writer_new( - world, SERD_TURTLE, SERD_WRITE_LAX, env, (SerdWriteFunc)fwrite, fd); + SerdByteSink* byte_sink = + serd_byte_sink_new_function((SerdWriteFunc)fwrite, fd, 1); + + SerdWriter* writer = + serd_writer_new(world, SERD_TURTLE, SERD_WRITE_LAX, env, byte_sink); + assert(writer); serd_writer_chop_blank_prefix(writer, "tmp"); @@ -280,6 +288,7 @@ test_writer(const char* const path) serd_node_free(hello); serd_writer_free(writer); + serd_byte_sink_free(byte_sink); serd_node_free(lit); serd_node_free(o); @@ -287,12 +296,10 @@ test_writer(const char* const path) serd_node_free(l); // Test buffer sink - SerdBuffer buffer = {NULL, 0}; - SerdByteSink* byte_sink = - serd_byte_sink_new((SerdWriteFunc)serd_buffer_sink, &buffer, 1); + SerdBuffer buffer = {NULL, 0}; - writer = serd_writer_new( - world, SERD_TURTLE, 0, env, (SerdWriteFunc)serd_byte_sink_write, byte_sink); + byte_sink = serd_byte_sink_new_buffer(&buffer); + writer = serd_writer_new(world, SERD_TURTLE, 0, env, byte_sink); SerdNode* const base = serd_new_uri(SERD_STATIC_STRING("http://example.org/base")); diff --git a/test/test_terse_write.c b/test/test_terse_write.c index d6f24357..06d2e0ce 100644 --- a/test/test_terse_write.c +++ b/test/test_terse_write.c @@ -73,10 +73,11 @@ test(void) serd_env_set_prefix( env, SERD_STATIC_STRING("rdf"), SERD_STATIC_STRING(NS_RDF)); - SerdWriter* writer = serd_writer_new( - world, SERD_TURTLE, 0, env, (SerdWriteFunc)serd_buffer_sink, &buffer); + SerdByteSink* const byte_sink = serd_byte_sink_new_buffer(&buffer); + SerdWriter* const writer = + serd_writer_new(world, SERD_TURTLE, 0, env, byte_sink); - const SerdSink* sink = serd_writer_sink(writer); + const SerdSink* const sink = serd_writer_sink(writer); // Simple lone list serd_sink_write(sink, SERD_TERSE_S | SERD_LIST_S, l1, rdf_first, s1, NULL); @@ -108,6 +109,7 @@ test(void) serd_buffer_sink_finish(&buffer); serd_writer_free(writer); + serd_byte_sink_free(byte_sink); serd_nodes_free(nodes); serd_env_free(env); serd_world_free(world); |