aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2020-06-28 19:46:47 +0200
committerDavid Robillard <d@drobilla.net>2021-03-08 23:23:06 -0500
commitf7b993d5aff1274e010d45304b12109f6de7e120 (patch)
tree6fadbff8f2e696ce62ba1237bee16c07776cd03e
parent2fb247d2beb71539ceba8f2841d7c1bad933ab36 (diff)
downloadserd-f7b993d5aff1274e010d45304b12109f6de7e120.tar.gz
serd-f7b993d5aff1274e010d45304b12109f6de7e120.tar.bz2
serd-f7b993d5aff1274e010d45304b12109f6de7e120.zip
WIP: Make Writer always write to a ByteSink
-rw-r--r--doc/serdi.110
-rw-r--r--include/serd/serd.h58
-rw-r--r--src/.clang-tidy1
-rw-r--r--src/byte_sink.c107
-rw-r--r--src/byte_sink.h76
-rw-r--r--src/node.c1
-rw-r--r--src/serdi.c45
-rw-r--r--src/writer.c29
-rw-r--r--test/test_reader_writer.c25
-rw-r--r--test/test_terse_write.c8
10 files changed, 202 insertions, 158 deletions
diff --git a/doc/serdi.1 b/doc/serdi.1
index 7cb2c923..dba9b34c 100644
--- a/doc/serdi.1
+++ b/doc/serdi.1
@@ -13,13 +13,13 @@
.Op Fl o Ar syntax
.Op Fl p Ar prefix
.Op Fl r Ar root
-.Op Fl s Ar input
+.Op Fl s Ar string
+.Op Fl w Ar filename
.Ar input
-.Op Ar base_uri
.Sh DESCRIPTION
.Nm
is a fast command-line utility for streaming and processing RDF data.
-It reads an RDF document and writes the data to stdout,
+It reads an RDF document and writes the data again,
possibly transformed and/or in a different syntax.
By default,
the input syntax is guessed from the file extension,
@@ -125,6 +125,10 @@ Write terser output without newlines.
.Pp
.It Fl v
Display version information and exit.
+.It Fl w Ar filename
+Write output to the given
+.Ar filename
+instead of stdout.
.El
.Sh EXIT STATUS
.Nm
diff --git a/include/serd/serd.h b/include/serd/serd.h
index af087ab5..61ae6099 100644
--- a/include/serd/serd.h
+++ b/include/serd/serd.h
@@ -419,36 +419,63 @@ typedef size_t (*SerdWriteFunc)(const void* SERD_NONNULL buf,
void* SERD_NONNULL stream);
/**
- Create a new byte sink.
+ Create a new byte sink that writes to a buffer.
- @param write_func Function called with bytes to consume.
- @param stream Context parameter passed to `sink`.
+ The `buffer` is owned by the caller, but will be expanded as necessary.
+
+ @param buffer Buffer to write output to.
+*/
+SERD_API
+SerdByteSink* SERD_ALLOCATED
+serd_byte_sink_new_buffer(SerdBuffer* SERD_NONNULL buffer);
+
+/**
+ Create a new byte sink that writes to a file
+
+ An arbitrary `FILE*` can be used via serd_byte_sink_new_function() as well,
+ this is just a convenience function that opens the file properly and sets
+ flags for optimized I/O if possible.
+
+ @param path Path of file to open and write to.
@param block_size Number of bytes to write per call.
*/
SERD_API
SerdByteSink* SERD_ALLOCATED
-serd_byte_sink_new(SerdWriteFunc SERD_NONNULL write_func,
- void* SERD_NULLABLE stream,
- size_t block_size);
+serd_byte_sink_new_filename(const char* SERD_NONNULL path, size_t block_size);
/**
- Write to `sink`.
+ Create a new byte sink that writes to a user-specified function.
+
+ The `stream` will be passed to the `write_func`, which is compatible with
+ the standard C `fwrite` if `stream` is a `FILE*`.
- Compatible with SerdWriteFunc.
+ @param write_func Function called with bytes to consume.
+ @param stream Context parameter passed to `sink`.
+ @param block_size Number of bytes to write per call.
*/
SERD_API
-size_t
-serd_byte_sink_write(const void* SERD_NONNULL buf,
- size_t size,
- size_t nmemb,
- SerdByteSink* SERD_NONNULL sink);
+SerdByteSink* SERD_ALLOCATED
+serd_byte_sink_new_function(SerdWriteFunc SERD_NONNULL write_func,
+ void* SERD_NULLABLE stream,
+ size_t block_size);
/// Flush any pending output in `sink` to the underlying write function
SERD_API
void
serd_byte_sink_flush(SerdByteSink* SERD_NONNULL sink);
-/// Free `sink`
+/**
+ Close `sink`, including the underlying file if necessary.
+
+ If `sink` was created with serd_byte_sink_new_filename(), then the file is
+ closed. If there was an error, then SERD_ERR_UNKNOWN is returned and
+ `errno` is set.
+*/
+SERD_API
+SerdStatus
+serd_byte_sink_close(SerdByteSink* SERD_NONNULL sink);
+
+/// Free `sink`, flushing and closing first if necessary
SERD_API
void
serd_byte_sink_free(SerdByteSink* SERD_NULLABLE sink);
@@ -1297,8 +1324,7 @@ serd_writer_new(SerdWorld* SERD_NONNULL world,
SerdSyntax syntax,
SerdWriterFlags flags,
SerdEnv* SERD_NONNULL env,
- SerdWriteFunc SERD_NONNULL write_func,
- void* SERD_NULLABLE stream);
+ SerdByteSink* SERD_NONNULL byte_sink);
/// Free `writer`
SERD_API
diff --git a/src/.clang-tidy b/src/.clang-tidy
index 79f266d9..af60b7a5 100644
--- a/src/.clang-tidy
+++ b/src/.clang-tidy
@@ -2,6 +2,7 @@ Checks: >
*,
-*-magic-numbers,
-*-uppercase-literal-suffix,
+ -android-cloexec-fopen,
-bugprone-branch-clone,
-bugprone-reserved-identifier,
-bugprone-suspicious-string-compare,
diff --git a/src/byte_sink.c b/src/byte_sink.c
index a90f503d..7aaec065 100644
--- a/src/byte_sink.c
+++ b/src/byte_sink.c
@@ -14,32 +14,48 @@
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
-#include "macros.h"
+#define _POSIX_C_SOURCE 200809L /* for posix_fadvise and fileno */
+
+#include "byte_sink.h"
+
+#include "serd_config.h"
#include "system.h"
#include "serd/serd.h"
-#include <assert.h>
#include <stddef.h>
+#include <stdio.h>
#include <stdlib.h>
-#include <string.h>
-struct SerdByteSinkImpl {
- SerdWriteFunc sink;
- void* stream;
- char* buf;
- size_t size;
- size_t block_size;
-};
+#if defined(USE_POSIX_FADVISE) || defined(USE_FILENO)
+# include <fcntl.h>
+#endif
SerdByteSink*
-serd_byte_sink_new(SerdWriteFunc write_func, void* stream, size_t block_size)
+serd_byte_sink_new_buffer(SerdBuffer* const buffer)
{
SerdByteSink* sink = (SerdByteSink*)calloc(1, sizeof(SerdByteSink));
- sink->sink = write_func;
+ sink->write_func = serd_buffer_sink;
+ sink->stream = buffer;
+ sink->block_size = 1;
+ sink->type = TO_BUFFER;
+
+ return sink;
+}
+
+static SerdByteSink*
+serd_byte_sink_new_internal(const SerdWriteFunc write_func,
+ void* const stream,
+ const size_t block_size,
+ const SerdByteSinkType type)
+{
+ SerdByteSink* sink = (SerdByteSink*)calloc(1, sizeof(SerdByteSink));
+
+ sink->write_func = write_func;
sink->stream = stream;
sink->block_size = block_size;
+ sink->type = type;
if (block_size > 1) {
sink->buf = (char*)serd_allocate_buffer(block_size);
@@ -48,58 +64,59 @@ serd_byte_sink_new(SerdWriteFunc write_func, void* stream, size_t block_size)
return sink;
}
-size_t
-serd_byte_sink_write(const void* buf,
- size_t size,
- size_t nmemb,
- SerdByteSink* sink)
+SerdByteSink*
+serd_byte_sink_new_filename(const char* const path, const size_t block_size)
{
- assert(size == 1);
- (void)size;
-
- if (nmemb == 0) {
- return 0;
+ FILE* const file = fopen(path, "wb");
+ if (!file) {
+ return NULL;
}
- if (sink->block_size == 1) {
- return sink->sink(buf, 1, nmemb, sink->stream);
- }
+#if defined(USE_POSIX_FADVISE) && defined(USE_FILENO)
+ posix_fadvise(fileno(file), 0, 0, POSIX_FADV_SEQUENTIAL);
+#endif
- const size_t orig_len = nmemb;
- while (nmemb) {
- const size_t space = sink->block_size - sink->size;
- const size_t n = MIN(space, nmemb);
-
- // Write as much as possible into the remaining buffer space
- memcpy(sink->buf + sink->size, buf, n);
- sink->size += n;
- buf = (const char*)buf + n;
- nmemb -= n;
-
- // Flush page if buffer is full
- if (sink->size == sink->block_size) {
- sink->sink(sink->buf, 1, sink->block_size, sink->stream);
- sink->size = 0;
- }
- }
+ return serd_byte_sink_new_internal(
+ (SerdWriteFunc)fwrite, file, block_size, TO_FILENAME);
+}
- return orig_len;
+SerdByteSink*
+serd_byte_sink_new_function(const SerdWriteFunc write_func,
+ void* const stream,
+ const size_t block_size)
+{
+ return serd_byte_sink_new_internal(
+ write_func, stream, block_size, TO_FUNCTION);
}
void
serd_byte_sink_flush(SerdByteSink* sink)
{
if (sink->block_size > 1 && sink->size > 0) {
- sink->sink(sink->buf, 1, sink->size, sink->stream);
+ sink->write_func(sink->buf, 1, sink->size, sink->stream);
sink->size = 0;
}
}
+SerdStatus
+serd_byte_sink_close(SerdByteSink* sink)
+{
+ serd_byte_sink_flush(sink);
+
+ if (sink->type == TO_FILENAME && sink->stream) {
+ const int st = fclose((FILE*)sink->stream);
+ sink->stream = NULL;
+ return st ? SERD_ERR_UNKNOWN : SERD_SUCCESS;
+ }
+
+ return SERD_SUCCESS;
+}
+
void
serd_byte_sink_free(SerdByteSink* sink)
{
if (sink) {
- serd_byte_sink_flush(sink);
+ serd_byte_sink_close(sink);
free(sink->buf);
free(sink);
}
diff --git a/src/byte_sink.h b/src/byte_sink.h
index 576f9c2e..abbe55ff 100644
--- a/src/byte_sink.h
+++ b/src/byte_sink.h
@@ -17,80 +17,56 @@
#ifndef SERD_BYTE_SINK_H
#define SERD_BYTE_SINK_H
-#include "serd_internal.h"
-#include "system.h"
-
#include "serd/serd.h"
#include <stddef.h>
#include <string.h>
-typedef struct SerdByteSinkImpl {
- SerdWriteFunc sink;
- void* stream;
- char* buf;
- size_t size;
- size_t block_size;
-} SerdByteSink;
-
-static inline SerdByteSink
-serd_byte_sink_new(SerdWriteFunc sink, void* stream, size_t block_size)
-{
- SerdByteSink bsink;
- bsink.sink = sink;
- bsink.stream = stream;
- bsink.size = 0;
- bsink.block_size = block_size;
- bsink.buf =
- ((block_size > 1) ? (char*)serd_allocate_buffer(block_size) : NULL);
- return bsink;
-}
-
-static inline void
-serd_byte_sink_flush(SerdByteSink* bsink)
-{
- if (bsink->block_size > 1 && bsink->size > 0) {
- bsink->sink(bsink->buf, 1, bsink->size, bsink->stream);
- bsink->size = 0;
- }
-}
-
-static inline void
-serd_byte_sink_free(SerdByteSink* bsink)
-{
- serd_byte_sink_flush(bsink);
- serd_free_aligned(bsink->buf);
- bsink->buf = NULL;
-}
+typedef enum {
+ TO_BUFFER, ///< Writing to a user-provided buffer
+ TO_FILENAME, ///< Writing to a file we opened
+ TO_FILE, ///< Writing to a user-provided file
+ TO_FUNCTION, ///< Writing to a user-provided function
+} SerdByteSinkType;
+
+struct SerdByteSinkImpl {
+ SerdWriteFunc write_func; ///< User sink for TO_FUNCTION
+ void* stream; ///< Handle for TO_FILE* and TO_FUNCTION
+ char* buf; ///< Local buffer iff block_size > 1
+ size_t size; ///< Bytes written so far in this chunk
+ size_t block_size; ///< Size of chunks to write
+ SerdByteSinkType type; ///< Type of output
+};
static inline size_t
-serd_byte_sink_write(const void* buf, size_t len, SerdByteSink* bsink)
+serd_byte_sink_write(const void* buf, size_t len, SerdByteSink* const sink)
{
if (len == 0) {
return 0;
}
- if (bsink->block_size == 1) {
- return bsink->sink(buf, 1, len, bsink->stream);
+ if (sink->block_size == 1) {
+ return sink->write_func(buf, 1, len, sink->stream);
}
const size_t orig_len = len;
while (len) {
- const size_t space = bsink->block_size - bsink->size;
- const size_t n = MIN(space, len);
+ const size_t space = sink->block_size - sink->size;
+ const size_t n = space < len ? space : len;
// Write as much as possible into the remaining buffer space
- memcpy(bsink->buf + bsink->size, buf, n);
- bsink->size += n;
+ memcpy(sink->buf + sink->size, buf, n);
+ sink->size += n;
buf = (const char*)buf + n;
len -= n;
// Flush page if buffer is full
- if (bsink->size == bsink->block_size) {
- bsink->sink(bsink->buf, 1, bsink->block_size, bsink->stream);
- bsink->size = 0;
+ if (sink->size == sink->block_size) {
+ sink->write_func(sink->buf, 1, sink->block_size, sink->stream);
+ sink->size = 0;
}
}
+
return orig_len;
}
diff --git a/src/node.c b/src/node.c
index 0448c312..14f6490b 100644
--- a/src/node.c
+++ b/src/node.c
@@ -615,6 +615,7 @@ serd_new_real_file_uri(const char* const path, const char* const hostname)
SerdNode* const node = serd_new_file_uri(SERD_MEASURE_STRING(real_path),
SERD_MEASURE_STRING(hostname));
+
free(real_path);
return node;
}
diff --git a/src/serdi.c b/src/serdi.c
index e546f463..8b72945a 100644
--- a/src/serdi.c
+++ b/src/serdi.c
@@ -110,6 +110,7 @@ main(int argc, char** argv)
const char* add_prefix = NULL;
const char* chop_prefix = NULL;
const char* root_uri = NULL;
+ const char* out_filename = NULL;
int a = 1;
for (; a < argc && argv[a][0] == '-'; ++a) {
if (argv[a][1] == '\0') {
@@ -188,6 +189,11 @@ main(int argc, char** argv)
}
root_uri = argv[a];
+ } else if (argv[a][1] == 'w') {
+ if (++a == argc) {
+ return missing_arg(argv[0], 'w');
+ }
+ out_filename = argv[a];
} else {
SERDI_ERRORF("invalid option -- '%s'\n", argv[a] + 1);
return print_usage(argv[0], true);
@@ -199,11 +205,6 @@ main(int argc, char** argv)
return 1;
}
-#ifdef _WIN32
- _setmode(_fileno(stdin), _O_BINARY);
- _setmode(_fileno(stdout), _O_BINARY);
-#endif
-
const char* input = argv[a++];
if (!input_syntax && !(input_syntax = serd_guess_syntax(input))) {
@@ -222,20 +223,29 @@ main(int argc, char** argv)
base = serd_new_file_uri(SERD_MEASURE_STRING(input), SERD_EMPTY_STRING());
}
- FILE* const out_fd = stdout;
- SerdWorld* const world = serd_world_new();
- SerdEnv* const env = serd_env_new(serd_node_string_view(base));
+ SerdWorld* const world = serd_world_new();
+ SerdEnv* const env = serd_env_new(serd_node_string_view(base));
+#ifdef _WIN32
+ _setmode(_fileno(stdin), _O_BINARY);
+ if (!out_filename) {
+ _setmode(_fileno(stdout), _O_BINARY);
+ }
+#endif
+
+ const size_t block_size = bulk_write ? 4096u : 1u;
SerdByteSink* const byte_sink =
- serd_byte_sink_new((SerdWriteFunc)fwrite, out_fd, bulk_write ? 4096u : 1u);
+ out_filename
+ ? serd_byte_sink_new_filename(out_filename, block_size)
+ : serd_byte_sink_new_function((SerdWriteFunc)fwrite, stdout, block_size);
+
+ if (!byte_sink) {
+ perror("serdi: error opening output file");
+ return 1;
+ }
SerdWriter* const writer =
- serd_writer_new(world,
- output_syntax,
- writer_flags,
- env,
- (SerdWriteFunc)serd_byte_sink_write,
- byte_sink);
+ serd_writer_new(world, output_syntax, writer_flags, env, byte_sink);
SerdReader* const reader = serd_reader_new(
world, input_syntax, reader_flags, serd_writer_sink(writer), stack_size);
@@ -275,15 +285,16 @@ main(int argc, char** argv)
serd_reader_free(reader);
serd_writer_free(writer);
serd_node_free(input_name);
- serd_byte_sink_free(byte_sink);
serd_env_free(env);
serd_node_free(base);
serd_world_free(world);
- if (fclose(stdout)) {
+ if (serd_byte_sink_close(byte_sink) || (!out_filename && fclose(stdout))) {
perror("serdi: write error");
st = SERD_ERR_UNKNOWN;
}
+ serd_byte_sink_free(byte_sink);
+
return (st > SERD_FAILURE) ? 1 : 0;
}
diff --git a/src/writer.c b/src/writer.c
index 0f54c0d3..b1404a91 100644
--- a/src/writer.c
+++ b/src/writer.c
@@ -14,6 +14,7 @@
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
+#include "byte_sink.h"
#include "env.h"
#include "node.h"
#include "sink.h"
@@ -131,8 +132,7 @@ struct SerdWriterImpl {
SerdURIView root_uri;
WriteContext* anon_stack;
size_t anon_stack_size;
- SerdWriteFunc write_func;
- void* stream;
+ SerdByteSink* byte_sink;
SerdErrorFunc error_func;
void* error_handle;
WriteContext context;
@@ -232,7 +232,7 @@ ctx(SerdWriter* writer, const SerdField field)
SERD_WARN_UNUSED_RESULT static inline size_t
sink(const void* buf, size_t len, SerdWriter* writer)
{
- const size_t written = writer->write_func(buf, 1, len, writer->stream);
+ const size_t written = serd_byte_sink_write(buf, len, writer->byte_sink);
if (written != len) {
if (errno) {
serd_world_errorf(writer->world,
@@ -1163,23 +1163,22 @@ serd_writer_new(SerdWorld* world,
SerdSyntax syntax,
SerdWriterFlags flags,
SerdEnv* env,
- SerdWriteFunc write_func,
- void* stream)
+ SerdByteSink* byte_sink)
{
const WriteContext context = WRITE_CONTEXT_NULL;
SerdWriter* writer = (SerdWriter*)calloc(1, sizeof(SerdWriter));
- writer->world = world;
- writer->syntax = syntax;
- writer->flags = flags;
- writer->env = env;
- writer->root_node = NULL;
- writer->root_uri = SERD_URI_NULL;
+
+ writer->world = world;
+ writer->syntax = syntax;
+ writer->flags = flags;
+ writer->env = env;
+ writer->root_node = NULL;
+ writer->root_uri = SERD_URI_NULL;
writer->anon_stack =
(WriteContext*)calloc(anon_stack_capacity, sizeof(WriteContext));
- writer->write_func = write_func;
- writer->stream = stream;
- writer->context = context;
- writer->empty = true;
+ writer->byte_sink = byte_sink;
+ writer->context = context;
+ writer->empty = true;
writer->iface.handle = writer;
writer->iface.on_event = (SerdEventFunc)serd_writer_on_event;
diff --git a/test/test_reader_writer.c b/test/test_reader_writer.c
index a3e99fa4..a4f57a0f 100644
--- a/test/test_reader_writer.c
+++ b/test/test_reader_writer.c
@@ -152,8 +152,11 @@ test_strict_write(void)
const char* path = "serd_strict_write_test.ttl";
FILE* fd = fopen(path, "wb");
SerdEnv* env = serd_env_new(SERD_EMPTY_STRING());
- SerdWriter* writer =
- serd_writer_new(world, SERD_TURTLE, 0, env, (SerdWriteFunc)fwrite, fd);
+
+ SerdByteSink* byte_sink =
+ serd_byte_sink_new_function((SerdWriteFunc)fwrite, fd, 1);
+
+ SerdWriter* writer = serd_writer_new(world, SERD_TURTLE, 0, env, byte_sink);
assert(fd);
assert(writer);
@@ -175,6 +178,7 @@ test_strict_write(void)
serd_node_free(p);
serd_writer_free(writer);
+ serd_byte_sink_free(byte_sink);
serd_env_free(env);
fclose(fd);
serd_world_free(world);
@@ -218,8 +222,12 @@ test_writer(const char* const path)
SerdWorld* world = serd_world_new();
- SerdWriter* writer = serd_writer_new(
- world, SERD_TURTLE, SERD_WRITE_LAX, env, (SerdWriteFunc)fwrite, fd);
+ SerdByteSink* byte_sink =
+ serd_byte_sink_new_function((SerdWriteFunc)fwrite, fd, 1);
+
+ SerdWriter* writer =
+ serd_writer_new(world, SERD_TURTLE, SERD_WRITE_LAX, env, byte_sink);
+
assert(writer);
serd_writer_chop_blank_prefix(writer, "tmp");
@@ -280,6 +288,7 @@ test_writer(const char* const path)
serd_node_free(hello);
serd_writer_free(writer);
+ serd_byte_sink_free(byte_sink);
serd_node_free(lit);
serd_node_free(o);
@@ -287,12 +296,10 @@ test_writer(const char* const path)
serd_node_free(l);
// Test buffer sink
- SerdBuffer buffer = {NULL, 0};
- SerdByteSink* byte_sink =
- serd_byte_sink_new((SerdWriteFunc)serd_buffer_sink, &buffer, 1);
+ SerdBuffer buffer = {NULL, 0};
- writer = serd_writer_new(
- world, SERD_TURTLE, 0, env, (SerdWriteFunc)serd_byte_sink_write, byte_sink);
+ byte_sink = serd_byte_sink_new_buffer(&buffer);
+ writer = serd_writer_new(world, SERD_TURTLE, 0, env, byte_sink);
SerdNode* const base =
serd_new_uri(SERD_STATIC_STRING("http://example.org/base"));
diff --git a/test/test_terse_write.c b/test/test_terse_write.c
index d6f24357..06d2e0ce 100644
--- a/test/test_terse_write.c
+++ b/test/test_terse_write.c
@@ -73,10 +73,11 @@ test(void)
serd_env_set_prefix(
env, SERD_STATIC_STRING("rdf"), SERD_STATIC_STRING(NS_RDF));
- SerdWriter* writer = serd_writer_new(
- world, SERD_TURTLE, 0, env, (SerdWriteFunc)serd_buffer_sink, &buffer);
+ SerdByteSink* const byte_sink = serd_byte_sink_new_buffer(&buffer);
+ SerdWriter* const writer =
+ serd_writer_new(world, SERD_TURTLE, 0, env, byte_sink);
- const SerdSink* sink = serd_writer_sink(writer);
+ const SerdSink* const sink = serd_writer_sink(writer);
// Simple lone list
serd_sink_write(sink, SERD_TERSE_S | SERD_LIST_S, l1, rdf_first, s1, NULL);
@@ -108,6 +109,7 @@ test(void)
serd_buffer_sink_finish(&buffer);
serd_writer_free(writer);
+ serd_byte_sink_free(byte_sink);
serd_nodes_free(nodes);
serd_env_free(env);
serd_world_free(world);