From 94879f376f1d2b8fbb2322bf2a7dab5c3bb9e098 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Fri, 1 Dec 2023 18:31:30 -0500 Subject: [TESTED] Replace -b and -e options with a block size option This is more powerful, and reduces the number of command line options that almost nobody needs to care about. --- doc/man/serd-pipe.1 | 11 +++++++---- scripts/serd_bench.py | 2 +- src/system.h | 17 ----------------- test/meson.build | 8 ++++++-- tools/serd-pipe.c | 35 ++++++++++++++++++++--------------- 5 files changed, 34 insertions(+), 39 deletions(-) diff --git a/doc/man/serd-pipe.1 b/doc/man/serd-pipe.1 index b8d2fd23..9b700068 100644 --- a/doc/man/serd-pipe.1 +++ b/doc/man/serd-pipe.1 @@ -8,8 +8,9 @@ .Nd read and write RDF data .Sh SYNOPSIS .Nm serd-pipe -.Op Fl abefhlqtv +.Op Fl afhlqtv .Op Fl B Ar base +.Op Fl b Ar bytes .Op Fl c Ar prefix .Op Fl i Ar syntax .Op Fl k Ar bytes @@ -45,9 +46,11 @@ or to provide a base URI for input from stdin or a string. .It Fl a Write ASCII output. If this is enabled, all non-ASCII characters will be escaped, even if the output syntax allows them to be written in UTF-8. -.It Fl b -Bulk output writing. -If this is enabled, output will be written a page at a time, rather than a byte at a time. +.It Fl b Ar bytes +I/O block size. +This is the number of bytes in a file that will be read or written at once. +The default is 4096, which should perform well in most cases. +Note that this only applies to files, standard input and output are always processed one byte at a time. .It Fl c Ar prefix Chop .Ar prefix diff --git a/scripts/serd_bench.py b/scripts/serd_bench.py index 1bd61855..d764cbda 100755 --- a/scripts/serd_bench.py +++ b/scripts/serd_bench.py @@ -271,7 +271,7 @@ example: args = ap.parse_args(sys.argv[1:]) - progs = ["serd-pipe -b -f -i turtle -o turtle"] + args.run + progs = ["serd-pipe -f -i turtle -o turtle"] + args.run min_n = int(args.max / args.steps) max_n = args.max step = min_n diff --git a/src/system.h b/src/system.h index ba5301fd..a0ec05dc 100644 --- a/src/system.h +++ b/src/system.h @@ -6,7 +6,6 @@ #include "zix/attributes.h" -#include #include #define SERD_PAGE_SIZE 4096 @@ -31,20 +30,4 @@ serd_allocate_buffer(size_t size); void serd_free_aligned(void* ptr); -/// Wrapper for getc that is compatible with SerdReadFunc -static inline size_t -serd_file_read_byte(void* buf, size_t size, size_t nmemb, void* stream) -{ - (void)size; - (void)nmemb; - - const int c = getc((FILE*)stream); - if (c == EOF) { - *((uint8_t*)buf) = 0; - return 0; - } - *((uint8_t*)buf) = (uint8_t)c; - return 1; -} - #endif // SERD_SRC_SYSTEM_H diff --git a/test/meson.build b/test/meson.build index 489a6ee5..e6b0fe5c 100644 --- a/test/meson.build +++ b/test/meson.build @@ -176,6 +176,10 @@ simple_command_tests = { 'bad': [ ['-B', 'nonuriorpath'], ['-B'], + ['-b', '-1'], + ['-b', '1024junk'], + ['-b', '9223372036854775807'], + ['-b'], ['-c'], ['-fi'], ['-i', 'turtle'], @@ -298,7 +302,7 @@ if is_variable('serd_pipe') io_error_tests = { 'read_dir_bulk': [serd_src_root], - 'read_dir_bytes': ['-e', serd_src_root], + 'read_dir_bytes': ['-b', '1', serd_src_root], 'read_dir_uri': ['file://@0@/'.format(serd_src_root)], } @@ -394,7 +398,7 @@ test_suites = { ns_serdtest + 'good/', '--', '-a', - '-b', + ['-b', '1'], ], 'fast': [ files('extra/perfect/manifest.ttl'), diff --git a/tools/serd-pipe.c b/tools/serd-pipe.c index 3d095e2e..480afa6e 100644 --- a/tools/serd-pipe.c +++ b/tools/serd-pipe.c @@ -29,7 +29,6 @@ #define SERDI_ERRORF(fmt, ...) fprintf(stderr, "serd-pipe: " fmt, __VA_ARGS__) #define MAX_DEPTH 128U -#define SERD_PAGE_SIZE 4096U static int print_usage(const char* const name, const bool error) @@ -39,9 +38,8 @@ print_usage(const char* const name, const bool error) "Use - for INPUT to read from standard input.\n\n" " -B BASE_URI Base URI.\n" " -a Write ASCII output.\n" - " -b Write output in blocks for performance.\n" + " -b BYTES I/O block size.\n" " -c PREFIX Chop PREFIX from matching blank node IDs.\n" - " -e Eat input one character at a time.\n" " -f Fast and loose URI pass-through.\n" " -h Display this help and exit.\n" " -i SYNTAX Input syntax: turtle/ntriples/trig/nquads.\n" @@ -85,7 +83,7 @@ read_file(SerdWorld* const world, const size_t stack_size, const char* const filename, const char* const add_prefix, - const bool bulk_read) + const size_t block_size) { SerdInputStream in = serd_open_tool_input(filename); if (!in.stream) { @@ -103,8 +101,7 @@ read_file(SerdWorld* const world, serd_reader_add_blank_prefix(reader, add_prefix); - SerdStatus st = - serd_reader_start(reader, &in, NULL, bulk_read ? SERD_PAGE_SIZE : 1U); + SerdStatus st = serd_reader_start(reader, &in, NULL, block_size); st = st ? st : serd_reader_read_document(reader); @@ -124,10 +121,9 @@ main(int argc, char** argv) SerdSyntax output_syntax = SERD_SYNTAX_EMPTY; SerdReaderFlags reader_flags = 0; SerdWriterFlags writer_flags = 0; - bool bulk_read = true; - bool bulk_write = false; bool osyntax_set = false; bool quiet = false; + size_t block_size = 4096U; size_t stack_size = 1048576U; const char* input_string = NULL; const char* add_prefix = ""; @@ -152,10 +148,6 @@ main(int argc, char** argv) if (opt == 'a') { writer_flags |= SERD_WRITE_ASCII; - } else if (opt == 'b') { - bulk_write = true; - } else if (opt == 'e') { - bulk_read = false; } else if (opt == 'f') { writer_flags |= (SERD_WRITE_UNQUALIFIED | SERD_WRITE_UNRESOLVED); } else if (opt == 'h') { @@ -176,6 +168,19 @@ main(int argc, char** argv) base = serd_new_uri(serd_string(argv[a])); break; + } else if (opt == 'b') { + if (argv[a][o + 1] || ++a == argc) { + return missing_arg(prog, 'b'); + } + + char* endptr = NULL; + const long size = strtol(argv[a], &endptr, 10); + if (size < 1 || size == LONG_MAX || *endptr != '\0') { + SERDI_ERRORF("invalid block size `%s'\n", argv[a]); + return 1; + } + block_size = (size_t)size; + break; } else if (opt == 'c') { if (argv[a][o + 1] || ++a == argc) { return missing_arg(prog, 'c'); @@ -291,8 +296,8 @@ main(int argc, char** argv) return 1; } - SerdWriter* const writer = serd_writer_new( - world, output_syntax, writer_flags, env, &out, bulk_write ? 4096U : 1U); + SerdWriter* const writer = + serd_writer_new(world, output_syntax, writer_flags, env, &out, block_size); if (quiet) { serd_world_set_error_func(world, quiet_error_func, NULL); @@ -352,7 +357,7 @@ main(int argc, char** argv) stack_size, inputs[i], n_inputs > 1 ? prefix : add_prefix, - bulk_read))) { + block_size))) { break; } } -- cgit v1.2.1