aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--doc/man/serd-pipe.111
-rwxr-xr-xscripts/serd_bench.py2
-rw-r--r--src/system.h17
-rw-r--r--test/meson.build8
-rw-r--r--tools/serd-pipe.c35
5 files changed, 34 insertions, 39 deletions
diff --git a/doc/man/serd-pipe.1 b/doc/man/serd-pipe.1
index b8d2fd23..9b700068 100644
--- a/doc/man/serd-pipe.1
+++ b/doc/man/serd-pipe.1
@@ -8,8 +8,9 @@
.Nd read and write RDF data
.Sh SYNOPSIS
.Nm serd-pipe
-.Op Fl abefhlqtv
+.Op Fl afhlqtv
.Op Fl B Ar base
+.Op Fl b Ar bytes
.Op Fl c Ar prefix
.Op Fl i Ar syntax
.Op Fl k Ar bytes
@@ -45,9 +46,11 @@ or to provide a base URI for input from stdin or a string.
.It Fl a
Write ASCII output.
If this is enabled, all non-ASCII characters will be escaped, even if the output syntax allows them to be written in UTF-8.
-.It Fl b
-Bulk output writing.
-If this is enabled, output will be written a page at a time, rather than a byte at a time.
+.It Fl b Ar bytes
+I/O block size.
+This is the number of bytes in a file that will be read or written at once.
+The default is 4096, which should perform well in most cases.
+Note that this only applies to files, standard input and output are always processed one byte at a time.
.It Fl c Ar prefix
Chop
.Ar prefix
diff --git a/scripts/serd_bench.py b/scripts/serd_bench.py
index 1bd61855..d764cbda 100755
--- a/scripts/serd_bench.py
+++ b/scripts/serd_bench.py
@@ -271,7 +271,7 @@ example:
args = ap.parse_args(sys.argv[1:])
- progs = ["serd-pipe -b -f -i turtle -o turtle"] + args.run
+ progs = ["serd-pipe -f -i turtle -o turtle"] + args.run
min_n = int(args.max / args.steps)
max_n = args.max
step = min_n
diff --git a/src/system.h b/src/system.h
index ba5301fd..a0ec05dc 100644
--- a/src/system.h
+++ b/src/system.h
@@ -6,7 +6,6 @@
#include "zix/attributes.h"
-#include <stdint.h>
#include <stdio.h>
#define SERD_PAGE_SIZE 4096
@@ -31,20 +30,4 @@ serd_allocate_buffer(size_t size);
void
serd_free_aligned(void* ptr);
-/// Wrapper for getc that is compatible with SerdReadFunc
-static inline size_t
-serd_file_read_byte(void* buf, size_t size, size_t nmemb, void* stream)
-{
- (void)size;
- (void)nmemb;
-
- const int c = getc((FILE*)stream);
- if (c == EOF) {
- *((uint8_t*)buf) = 0;
- return 0;
- }
- *((uint8_t*)buf) = (uint8_t)c;
- return 1;
-}
-
#endif // SERD_SRC_SYSTEM_H
diff --git a/test/meson.build b/test/meson.build
index 489a6ee5..e6b0fe5c 100644
--- a/test/meson.build
+++ b/test/meson.build
@@ -176,6 +176,10 @@ simple_command_tests = {
'bad': [
['-B', 'nonuriorpath'],
['-B'],
+ ['-b', '-1'],
+ ['-b', '1024junk'],
+ ['-b', '9223372036854775807'],
+ ['-b'],
['-c'],
['-fi'],
['-i', 'turtle'],
@@ -298,7 +302,7 @@ if is_variable('serd_pipe')
io_error_tests = {
'read_dir_bulk': [serd_src_root],
- 'read_dir_bytes': ['-e', serd_src_root],
+ 'read_dir_bytes': ['-b', '1', serd_src_root],
'read_dir_uri': ['file://@0@/'.format(serd_src_root)],
}
@@ -394,7 +398,7 @@ test_suites = {
ns_serdtest + 'good/',
'--',
'-a',
- '-b',
+ ['-b', '1'],
],
'fast': [
files('extra/perfect/manifest.ttl'),
diff --git a/tools/serd-pipe.c b/tools/serd-pipe.c
index 3d095e2e..480afa6e 100644
--- a/tools/serd-pipe.c
+++ b/tools/serd-pipe.c
@@ -29,7 +29,6 @@
#define SERDI_ERRORF(fmt, ...) fprintf(stderr, "serd-pipe: " fmt, __VA_ARGS__)
#define MAX_DEPTH 128U
-#define SERD_PAGE_SIZE 4096U
static int
print_usage(const char* const name, const bool error)
@@ -39,9 +38,8 @@ print_usage(const char* const name, const bool error)
"Use - for INPUT to read from standard input.\n\n"
" -B BASE_URI Base URI.\n"
" -a Write ASCII output.\n"
- " -b Write output in blocks for performance.\n"
+ " -b BYTES I/O block size.\n"
" -c PREFIX Chop PREFIX from matching blank node IDs.\n"
- " -e Eat input one character at a time.\n"
" -f Fast and loose URI pass-through.\n"
" -h Display this help and exit.\n"
" -i SYNTAX Input syntax: turtle/ntriples/trig/nquads.\n"
@@ -85,7 +83,7 @@ read_file(SerdWorld* const world,
const size_t stack_size,
const char* const filename,
const char* const add_prefix,
- const bool bulk_read)
+ const size_t block_size)
{
SerdInputStream in = serd_open_tool_input(filename);
if (!in.stream) {
@@ -103,8 +101,7 @@ read_file(SerdWorld* const world,
serd_reader_add_blank_prefix(reader, add_prefix);
- SerdStatus st =
- serd_reader_start(reader, &in, NULL, bulk_read ? SERD_PAGE_SIZE : 1U);
+ SerdStatus st = serd_reader_start(reader, &in, NULL, block_size);
st = st ? st : serd_reader_read_document(reader);
@@ -124,10 +121,9 @@ main(int argc, char** argv)
SerdSyntax output_syntax = SERD_SYNTAX_EMPTY;
SerdReaderFlags reader_flags = 0;
SerdWriterFlags writer_flags = 0;
- bool bulk_read = true;
- bool bulk_write = false;
bool osyntax_set = false;
bool quiet = false;
+ size_t block_size = 4096U;
size_t stack_size = 1048576U;
const char* input_string = NULL;
const char* add_prefix = "";
@@ -152,10 +148,6 @@ main(int argc, char** argv)
if (opt == 'a') {
writer_flags |= SERD_WRITE_ASCII;
- } else if (opt == 'b') {
- bulk_write = true;
- } else if (opt == 'e') {
- bulk_read = false;
} else if (opt == 'f') {
writer_flags |= (SERD_WRITE_UNQUALIFIED | SERD_WRITE_UNRESOLVED);
} else if (opt == 'h') {
@@ -176,6 +168,19 @@ main(int argc, char** argv)
base = serd_new_uri(serd_string(argv[a]));
break;
+ } else if (opt == 'b') {
+ if (argv[a][o + 1] || ++a == argc) {
+ return missing_arg(prog, 'b');
+ }
+
+ char* endptr = NULL;
+ const long size = strtol(argv[a], &endptr, 10);
+ if (size < 1 || size == LONG_MAX || *endptr != '\0') {
+ SERDI_ERRORF("invalid block size `%s'\n", argv[a]);
+ return 1;
+ }
+ block_size = (size_t)size;
+ break;
} else if (opt == 'c') {
if (argv[a][o + 1] || ++a == argc) {
return missing_arg(prog, 'c');
@@ -291,8 +296,8 @@ main(int argc, char** argv)
return 1;
}
- SerdWriter* const writer = serd_writer_new(
- world, output_syntax, writer_flags, env, &out, bulk_write ? 4096U : 1U);
+ SerdWriter* const writer =
+ serd_writer_new(world, output_syntax, writer_flags, env, &out, block_size);
if (quiet) {
serd_world_set_error_func(world, quiet_error_func, NULL);
@@ -352,7 +357,7 @@ main(int argc, char** argv)
stack_size,
inputs[i],
n_inputs > 1 ? prefix : add_prefix,
- bulk_read))) {
+ block_size))) {
break;
}
}