diff options
author | David Robillard <d@drobilla.net> | 2020-06-28 23:26:48 +0200 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2023-12-02 18:49:08 -0500 |
commit | a083c64f506175029280ff76defa0ad7d7ae2ea0 (patch) | |
tree | 5e666749e352659d225d9c45c60bee06bd2bfe5c | |
parent | 20eb7727954f9d8b7164146895904bbe595f5932 (diff) | |
download | serd-a083c64f506175029280ff76defa0ad7d7ae2ea0.tar.gz serd-a083c64f506175029280ff76defa0ad7d7ae2ea0.tar.bz2 serd-a083c64f506175029280ff76defa0ad7d7ae2ea0.zip |
Simplify input stream API
-rw-r--r-- | include/serd/input_stream.h | 90 | ||||
-rw-r--r-- | include/serd/reader.h | 37 | ||||
-rw-r--r-- | include/serd/serd.h | 1 | ||||
-rw-r--r-- | include/serd/stream.h | 32 | ||||
-rw-r--r-- | meson.build | 2 | ||||
-rw-r--r-- | src/byte_source.c | 115 | ||||
-rw-r--r-- | src/byte_source.h | 61 | ||||
-rw-r--r-- | src/input_stream.c | 139 | ||||
-rw-r--r-- | src/read_nquads.c | 2 | ||||
-rw-r--r-- | src/read_ntriples.c | 2 | ||||
-rw-r--r-- | src/read_trig.c | 2 | ||||
-rw-r--r-- | src/read_turtle.c | 4 | ||||
-rw-r--r-- | src/reader.c | 105 | ||||
-rw-r--r-- | src/reader.h | 11 | ||||
-rw-r--r-- | src/serdi.c | 27 | ||||
-rw-r--r-- | src/world.c | 27 | ||||
-rw-r--r-- | src/world.h | 6 | ||||
-rw-r--r-- | test/meson.build | 11 | ||||
-rw-r--r-- | test/test_overflow.c | 9 | ||||
-rw-r--r-- | test/test_reader.c | 166 | ||||
-rw-r--r-- | test/test_reader_writer.c | 20 |
21 files changed, 561 insertions, 308 deletions
diff --git a/include/serd/input_stream.h b/include/serd/input_stream.h new file mode 100644 index 00000000..cc63e694 --- /dev/null +++ b/include/serd/input_stream.h @@ -0,0 +1,90 @@ +// Copyright 2011-2022 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#ifndef SERD_INPUT_STREAM_H +#define SERD_INPUT_STREAM_H + +#include "serd/attributes.h" +#include "serd/status.h" +#include "serd/stream.h" +#include "zix/attributes.h" + +SERD_BEGIN_DECLS + +/** + @defgroup serd_input_stream Input Streams + @ingroup serd_reading_writing + + An input stream is used for reading input as a raw stream of bytes. It is + compatible with standard C `FILE` streams, but allows different functions to + be provided for things like reading from a buffer or a socket. + + @{ +*/ + +/// An input stream that produces bytes +typedef struct { + void* ZIX_NULLABLE stream; ///< Opaque parameter for functions + SerdReadFunc ZIX_NONNULL read; ///< Read bytes from input + SerdErrorFunc ZIX_NULLABLE error; ///< Stream error accessor + SerdCloseFunc ZIX_NULLABLE close; ///< Close input +} SerdInputStream; + +/** + Open a stream that reads from a provided function. + + @param read_func Function to read input. + @param error_func Function used to detect errors. + @param close_func Function to close the stream after reading is done. + @param stream Opaque stream parameter for functions. + + @return An opened input stream, or all zeros on error. +*/ +SERD_CONST_API SerdInputStream +serd_open_input_stream(SerdReadFunc ZIX_NONNULL read_func, + SerdErrorFunc ZIX_NONNULL error_func, + SerdCloseFunc ZIX_NULLABLE close_func, + void* ZIX_NULLABLE stream); + +/** + Open a stream that reads from a string. + + The string pointer that position points to must remain valid until the + stream is closed. This pointer serves as the internal stream state and will + be mutated as the stream is used. + + @param position Pointer to a valid string pointer for use as stream state. + @return An opened input stream, or all zeros on error. +*/ +SERD_CONST_API SerdInputStream +serd_open_input_string(const char* ZIX_NONNULL* ZIX_NONNULL position); + +/** + Open a stream that reads from a file. + + An arbitrary `FILE*` can be used with serd_open_input_stream() as well, this + convenience function opens the file properly for reading with serd, and sets + flags for optimized I/O if possible. + + @param path Path of file to open and read from. +*/ +SERD_API SerdInputStream +serd_open_input_file(const char* ZIX_NONNULL path); + +/** + Close an input stream. + + This will call the close function, and reset the stream internally so that + no further reads can be made. For convenience, this is safe to call on + NULL, and safe to call several times on the same input. +*/ +SERD_API SerdStatus +serd_close_input(SerdInputStream* ZIX_NULLABLE input); + +/** + @} +*/ + +SERD_END_DECLS + +#endif // SERD_INPUT_STREAM_H diff --git a/include/serd/reader.h b/include/serd/reader.h index c79d6a44..4c669342 100644 --- a/include/serd/reader.h +++ b/include/serd/reader.h @@ -5,15 +5,14 @@ #define SERD_READER_H #include "serd/attributes.h" +#include "serd/input_stream.h" #include "serd/node.h" #include "serd/sink.h" #include "serd/status.h" -#include "serd/stream.h" #include "serd/syntax.h" #include "serd/world.h" #include "zix/attributes.h" -#include <stdbool.h> #include <stddef.h> #include <stdint.h> @@ -56,31 +55,23 @@ SERD_API void serd_reader_add_blank_prefix(SerdReader* ZIX_NONNULL reader, const char* ZIX_NULLABLE prefix); -/// Prepare to read from the file at a local file `uri` -SERD_API SerdStatus -serd_reader_start_file(SerdReader* ZIX_NONNULL reader, - const char* ZIX_NONNULL uri, - bool bulk); - /** - Prepare to read from a stream. + Prepare to read some input. - The `read_func` is guaranteed to only be called for `page_size` elements - with size 1 (i.e. `page_size` bytes). + This sets up the reader to read from the given input, but will not read any + bytes from it. This should be followed by serd_reader_read_chunk() or + serd_reader_read_document() to actually read the input. + + @param reader The reader. + @param input An opened input stream to read from. + @param input_name The name of the input stream for error messages. + @param block_size The number of bytes to read from the stream at once. */ SERD_API SerdStatus -serd_reader_start_stream(SerdReader* ZIX_NONNULL reader, - SerdReadFunc ZIX_NONNULL read_func, - SerdStreamErrorFunc ZIX_NONNULL error_func, - void* ZIX_NONNULL stream, - const SerdNode* ZIX_NULLABLE name, - size_t page_size); - -/// Prepare to read from a string -SERD_API SerdStatus -serd_reader_start_string(SerdReader* ZIX_NONNULL reader, - const char* ZIX_NONNULL utf8, - const SerdNode* ZIX_NULLABLE name); +serd_reader_start(SerdReader* ZIX_NONNULL reader, + SerdInputStream* ZIX_NONNULL input, + const SerdNode* ZIX_NULLABLE input_name, + size_t block_size); /** Read a single "chunk" of data during an incremental read. diff --git a/include/serd/serd.h b/include/serd/serd.h index 5b13f676..58c8e7ec 100644 --- a/include/serd/serd.h +++ b/include/serd/serd.h @@ -78,6 +78,7 @@ @{ */ +#include "serd/input_stream.h" #include "serd/reader.h" #include "serd/stream.h" #include "serd/writer.h" diff --git a/include/serd/stream.h b/include/serd/stream.h index 4b3582f5..f59e2c9d 100644 --- a/include/serd/stream.h +++ b/include/serd/stream.h @@ -33,10 +33,19 @@ SERD_BEGIN_DECLS typedef int (*SerdStreamErrorFunc)(void* ZIX_NONNULL stream); /** + Function to close an I/O stream. + + Identical semantics to `fclose`. + + @return Non-zero if `stream` has encountered an error. +*/ +typedef int (*SerdStreamCloseFunc)(void* ZIX_NONNULL stream); + +/** Function for reading input bytes from a stream. This has identical semantics to `fread`, but may set `errno` for more - informative error reporting than supported by #SerdStreamErrorFunc. + informative error reporting than supported by #SerdErrorFunc. @param buf Output buffer. @param size Size of a single element of data in bytes (always 1). @@ -67,6 +76,27 @@ typedef size_t (*SerdWriteFunc)(const void* ZIX_NONNULL buf, void* ZIX_NONNULL stream); /** + Function for detecting I/O stream errors. + + This has identical semantics to `ferror`. + + @return Non-zero if `stream` has encountered an error. +*/ +typedef int (*SerdErrorFunc)(void* ZIX_NONNULL stream); + +/** + Function for closing an I/O stream. + + This has identical semantics to `fclose`. Note that when writing, this may + flush the stream which can cause errors, including errors caused by previous + writes that appeared successful at the time. Therefore it is necessary to + check the return value of this function to properly detect write errors. + + @return Non-zero if `stream` has encountered an error. +*/ +typedef int (*SerdCloseFunc)(void* ZIX_NONNULL stream); + +/** @} */ diff --git a/meson.build b/meson.build index 3a7a3285..cc173f44 100644 --- a/meson.build +++ b/meson.build @@ -133,6 +133,7 @@ c_headers = files( 'include/serd/env.h', 'include/serd/error.h', 'include/serd/event.h', + 'include/serd/input_stream.h', 'include/serd/memory.h', 'include/serd/node.h', 'include/serd/reader.h', @@ -156,6 +157,7 @@ sources = files( 'src/byte_source.c', 'src/caret.c', 'src/env.c', + 'src/input_stream.c', 'src/node.c', 'src/read_nquads.c', 'src/read_ntriples.c', diff --git a/src/byte_source.c b/src/byte_source.c index 7a839875..e4810b60 100644 --- a/src/byte_source.c +++ b/src/byte_source.c @@ -11,120 +11,95 @@ #include <assert.h> #include <stdbool.h> #include <stdint.h> +#include <stdlib.h> #include <string.h> SerdStatus serd_byte_source_page(SerdByteSource* const source) { uint8_t* const buf = - (source->page_size > 1 ? source->file_buf : &source->read_byte); + (source->block_size > 1 ? source->block : &source->read_byte); const size_t n_read = - source->read_func(buf, 1, source->page_size, source->stream); + source->in->read(buf, 1, source->block_size, source->in->stream); source->buf_size = n_read; source->read_head = 0; source->eof = false; - if (n_read < source->page_size) { + if (n_read < source->block_size) { buf[n_read] = '\0'; if (n_read == 0) { source->eof = true; - return (source->error_func(source->stream) ? SERD_BAD_STREAM - : SERD_FAILURE); + return (source->in->error(source->in->stream) ? SERD_BAD_STREAM + : SERD_FAILURE); } } return SERD_SUCCESS; } -SerdStatus -serd_byte_source_open_source(SerdByteSource* const source, - const SerdReadFunc read_func, - const SerdStreamErrorFunc error_func, - const SerdStreamCloseFunc close_func, - void* const stream, - const SerdNode* const name, - const size_t page_size) +static void +serd_byte_source_init_buffer(SerdByteSource* const source) { - assert(read_func); - assert(error_func); - assert(page_size > 0); - - memset(source, '\0', sizeof(*source)); - source->read_func = read_func; - source->error_func = error_func; - source->close_func = close_func; - source->stream = stream; - source->page_size = page_size; - source->buf_size = page_size; - source->name = serd_node_copy(name); - source->caret.document = source->name; - source->caret.line = 1U; - source->caret.col = 1U; - source->from_stream = true; - - if (page_size > 1) { - source->file_buf = (uint8_t*)serd_allocate_buffer(page_size); - source->read_buf = source->file_buf; - memset(source->file_buf, '\0', page_size); + if (source->block_size > 1) { + source->block = (uint8_t*)serd_allocate_buffer(source->block_size); + source->read_buf = source->block; + memset(source->block, '\0', source->block_size); } else { source->read_buf = &source->read_byte; } - - return SERD_SUCCESS; } -SerdStatus -serd_byte_source_prepare(SerdByteSource* const source) +SerdByteSource* +serd_byte_source_new_input(SerdInputStream* const input, + const SerdNode* const name, + const size_t block_size) { - if (source->page_size == 0) { - return SERD_FAILURE; - } + assert(input); - source->prepared = true; - - if (source->from_stream) { - return (source->page_size > 1 ? serd_byte_source_page(source) - : serd_byte_source_advance(source)); + if (!block_size || !input->stream) { + return NULL; } - return SERD_SUCCESS; -} - -SerdStatus -serd_byte_source_open_string(SerdByteSource* const source, - const char* const utf8, - const SerdNode* const name) -{ - memset(source, '\0', sizeof(*source)); + SerdByteSource* source = (SerdByteSource*)calloc(1, sizeof(SerdByteSource)); - source->page_size = 1; source->name = - name ? serd_node_copy(name) : serd_new_string(serd_string("string")); + name ? serd_node_copy(name) : serd_new_string(serd_string("input")); - source->page_size = 1U; - source->read_buf = (const uint8_t*)utf8; + source->in = input; + source->block_size = block_size; + source->buf_size = block_size; source->caret.document = source->name; source->caret.line = 1U; source->caret.col = 1U; - return SERD_SUCCESS; + serd_byte_source_init_buffer(source); + + return source; } -SerdStatus -serd_byte_source_close(SerdByteSource* const source) +void +serd_byte_source_free(SerdByteSource* const source) { - SerdStatus st = SERD_SUCCESS; - if (source->close_func) { - st = source->close_func(source->stream) ? SERD_BAD_STREAM : SERD_SUCCESS; + if (source) { + if (source->block_size > 1) { + serd_free_aligned(source->block); + } + + serd_node_free(source->name); + free(source); } +} + +SerdStatus +serd_byte_source_prepare(SerdByteSource* const source) +{ + source->prepared = true; - if (source->page_size > 1) { - serd_free_aligned(source->file_buf); + if (source->block_size > 1) { + return serd_byte_source_page(source); } - serd_node_free(source->name); - memset(source, '\0', sizeof(*source)); - return st; + return serd_byte_source_advance(source); } diff --git a/src/byte_source.h b/src/byte_source.h index d40012ea..3a16a7c6 100644 --- a/src/byte_source.h +++ b/src/byte_source.h @@ -7,9 +7,9 @@ #include "caret.h" // IWYU pragma: keep #include "serd/caret.h" +#include "serd/input_stream.h" #include "serd/node.h" #include "serd/status.h" -#include "serd/stream.h" #include "zix/attributes.h" #include <assert.h> @@ -17,42 +17,27 @@ #include <stddef.h> #include <stdint.h> -typedef int (*SerdStreamCloseFunc)(void*); - typedef struct { - SerdReadFunc read_func; ///< Read function (e.g. fread) - SerdStreamErrorFunc error_func; ///< Error function (e.g. ferror) - SerdStreamCloseFunc close_func; ///< Function for closing stream - void* stream; ///< Stream (e.g. FILE) - size_t page_size; ///< Number of bytes to read at a time - size_t buf_size; ///< Number of bytes in file_buf - SerdNode* name; ///< Name of stream (referenced by cur) - SerdCaret caret; ///< Caret for error reporting - uint8_t* file_buf; ///< Buffer iff reading pages from a file - const uint8_t* read_buf; ///< Pointer to file_buf or read_byte - size_t read_head; ///< Offset into read_buf - uint8_t read_byte; ///< 1-byte 'buffer' used when not paging - bool from_stream; ///< True iff reading from `stream` - bool prepared; ///< True iff prepared for reading - bool eof; ///< True iff end of file reached + SerdInputStream* in; ///< Input stream to read from + size_t block_size; ///< Number of bytes to read at a time + size_t buf_size; ///< Number of bytes in block + SerdNode* name; ///< Name of stream (for caret) + SerdCaret caret; ///< File position for error reporting + uint8_t* block; ///< Buffer if reading blocks + const uint8_t* read_buf; ///< Pointer to block or read_byte + size_t read_head; ///< Offset into read_buf + uint8_t read_byte; ///< 1-byte 'buffer' if reading bytes + bool prepared; ///< True iff prepared for reading + bool eof; ///< True iff end of file reached } SerdByteSource; -SerdStatus -serd_byte_source_open_string(SerdByteSource* source, - const char* utf8, - const SerdNode* name); - -SerdStatus -serd_byte_source_open_source(SerdByteSource* source, - SerdReadFunc read_func, - SerdStreamErrorFunc error_func, - SerdStreamCloseFunc close_func, - void* stream, - const SerdNode* name, - size_t page_size); +SerdByteSource* +serd_byte_source_new_input(SerdInputStream* input, + const SerdNode* name, + size_t block_size); -SerdStatus -serd_byte_source_close(SerdByteSource* source); +void +serd_byte_source_free(SerdByteSource* source); SerdStatus serd_byte_source_prepare(SerdByteSource* source); @@ -74,6 +59,8 @@ serd_byte_source_advance(SerdByteSource* source) const bool was_eof = source->eof; switch (serd_byte_source_peek(source)) { + case '\0': + break; case '\n': ++source->caret.line; source->caret.col = 0; @@ -82,12 +69,8 @@ serd_byte_source_advance(SerdByteSource* source) ++source->caret.col; } - if (source->from_stream) { - if (++source->read_head >= source->buf_size) { - st = serd_byte_source_page(source); - } - } else if (!source->eof) { - source->eof = source->read_buf[++source->read_head] == '\0'; + if (++source->read_head >= source->buf_size) { + st = serd_byte_source_page(source); } return (was_eof && source->eof) ? SERD_FAILURE : st; diff --git a/src/input_stream.c b/src/input_stream.c new file mode 100644 index 00000000..24db04f6 --- /dev/null +++ b/src/input_stream.c @@ -0,0 +1,139 @@ +// Copyright 2011-2021 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#include "serd_config.h" + +#include "serd/input_stream.h" +#include "serd/status.h" +#include "serd/stream.h" + +#include <sys/stat.h> + +#if USE_POSIX_FADVISE && USE_FILENO +# include <fcntl.h> +#endif + +// IWYU pragma: no_include <features.h> + +#include <assert.h> +#include <stdbool.h> +#include <stdio.h> + +static size_t +serd_string_read(void* const buf, + const size_t size, + const size_t nmemb, + void* const stream) +{ + const char** position = (const char**)stream; + + size_t n_read = 0U; + const size_t len = size * nmemb; + while (n_read < len && **position) { + ((char*)buf)[n_read++] = **position; + + ++(*position); + } + + return n_read; +} + +static int +serd_string_error(void* const stream) +{ + (void)stream; + return 0; +} + +static int +serd_string_close(void* const stream) +{ + (void)stream; + return 0; +} + +SerdInputStream +serd_open_input_stream(const SerdReadFunc read_func, + const SerdErrorFunc error_func, + const SerdCloseFunc close_func, + void* const stream) +{ + assert(read_func); + assert(error_func); + + SerdInputStream input = {stream, read_func, error_func, close_func}; + return input; +} + +static bool +is_directory(const char* const path) +{ +#ifdef _MSC_VER + struct stat st; + return !stat(path, &st) && (st.st_mode & _S_IFDIR); +#else + struct stat st; + return !stat(path, &st) && S_ISDIR(st.st_mode); +#endif +} + +SerdInputStream +serd_open_input_string(const char** const position) +{ + assert(position); + assert(*position); + + const SerdInputStream input = { + position, serd_string_read, serd_string_error, serd_string_close}; + + return input; +} + +SerdInputStream +serd_open_input_file(const char* const path) +{ + assert(path); + + SerdInputStream input = {NULL, NULL, NULL, NULL}; + if (is_directory(path)) { + return input; + } + +#ifdef __GLIBC__ + FILE* const file = fopen(path, "rbe"); +#else + FILE* const file = fopen(path, "rb"); +#endif + + if (!file) { + return input; + } + +#if USE_POSIX_FADVISE && USE_FILENO + (void)posix_fadvise(fileno(file), 0, 0, POSIX_FADV_SEQUENTIAL); +#endif + + input.stream = file; + input.read = (SerdReadFunc)fread; + input.error = (SerdErrorFunc)ferror; + input.close = (SerdCloseFunc)fclose; + + return input; +} + +SerdStatus +serd_close_input(SerdInputStream* const input) +{ + int ret = 0; + + if (input) { + if (input->close && input->stream) { + ret = input->close(input->stream); + input->stream = NULL; + } + + input->stream = NULL; + } + + return ret ? SERD_BAD_STREAM : SERD_SUCCESS; +} diff --git a/src/read_nquads.c b/src/read_nquads.c index b4e200d4..6f0120d1 100644 --- a/src/read_nquads.c +++ b/src/read_nquads.c @@ -44,7 +44,7 @@ read_nquads_statement(SerdReader* const reader) } // Preserve the caret for error reporting and read object - SerdCaret orig_caret = reader->source.caret; + SerdCaret orig_caret = reader->source->caret; if ((st = read_nt_object(reader, &ctx.object, &ate_dot)) || (st = skip_horizontal_whitespace(reader))) { return st; diff --git a/src/read_ntriples.c b/src/read_ntriples.c index df9a0e91..da726658 100644 --- a/src/read_ntriples.c +++ b/src/read_ntriples.c @@ -632,7 +632,7 @@ read_triple(SerdReader* const reader) } // Preserve the caret for error reporting and read object - SerdCaret orig_caret = reader->source.caret; + SerdCaret orig_caret = reader->source->caret; if ((st = read_nt_object(reader, &ctx.object, &ate_dot)) || (st = skip_horizontal_whitespace(reader))) { return st; diff --git a/src/read_trig.c b/src/read_trig.c index 45ac39a0..d710eaeb 100644 --- a/src/read_trig.c +++ b/src/read_trig.c @@ -184,7 +184,7 @@ read_trig_statement(SerdReader* const reader) SerdStatus read_trigDoc(SerdReader* const reader) { - while (!reader->source.eof) { + while (!reader->source->eof) { const size_t orig_stack_size = reader->stack.size; const SerdStatus st = read_trig_statement(reader); diff --git a/src/read_turtle.c b/src/read_turtle.c index 19a47ebf..828818ba 100644 --- a/src/read_turtle.c +++ b/src/read_turtle.c @@ -49,7 +49,7 @@ read_whitespace(SerdReader* const reader) case '\n': case '\r': case ' ': - return serd_byte_source_advance(&reader->source); + return serd_byte_source_advance(reader->source); case '#': return read_comment(reader); default: @@ -1019,7 +1019,7 @@ read_turtle_statement(SerdReader* const reader) SerdStatus read_turtleDoc(SerdReader* const reader) { - while (!reader->source.eof) { + while (!reader->source->eof) { const size_t orig_stack_size = reader->stack.size; const SerdStatus st = read_turtle_statement(reader); diff --git a/src/reader.c b/src/reader.c index d7c87c47..bf6d697f 100644 --- a/src/reader.c +++ b/src/reader.c @@ -16,12 +16,9 @@ #include "system.h" #include "world.h" -#include "serd/stream.h" -#include "serd/string_view.h" -#include "serd/uri.h" +#include "serd/input_stream.h" #include <assert.h> -#include <errno.h> #include <stdarg.h> #include <stdio.h> #include <stdlib.h> @@ -35,7 +32,7 @@ r_err(SerdReader* const reader, const SerdStatus st, const char* const fmt, ...) { va_list args; // NOLINT(cppcoreguidelines-init-variables) va_start(args, fmt); - const SerdError e = {st, &reader->source.caret, fmt, &args}; + const SerdError e = {st, &reader->source->caret, fmt, &args}; serd_world_error(reader->world, &e); va_end(args); return st; @@ -90,7 +87,8 @@ tolerate_status(const SerdReader* const reader, const SerdStatus status) } if (status == SERD_BAD_STREAM || status == SERD_BAD_STACK || - status == SERD_BAD_WRITE || status == SERD_NO_DATA) { + status == SERD_BAD_WRITE || status == SERD_NO_DATA || + status == SERD_BAD_CALL) { return false; } @@ -174,7 +172,7 @@ emit_statement(SerdReader* const reader, serd_node_zero_pad(o); const SerdStatement statement = {{ctx.subject, ctx.predicate, o, ctx.graph}, - &reader->source.caret}; + &reader->source->caret}; const SerdStatus st = serd_sink_write_statement(reader->sink, *ctx.flags, &statement); @@ -188,7 +186,11 @@ serd_reader_read_document(SerdReader* const reader) { assert(reader); - if (reader->syntax != SERD_SYNTAX_EMPTY && !reader->source.prepared) { + if (!reader->source) { + return SERD_BAD_CALL; + } + + if (reader->syntax != SERD_SYNTAX_EMPTY && !reader->source->prepared) { SerdStatus st = serd_reader_prepare(reader); if (st) { return st; @@ -256,7 +258,9 @@ serd_reader_free(SerdReader* const reader) return; } - serd_reader_finish(reader); + if (reader->source) { + serd_reader_finish(reader); + } serd_free_aligned(reader->stack.buf); free(reader->bprefix); @@ -283,12 +287,12 @@ serd_reader_add_blank_prefix(SerdReader* const reader, const char* const prefix) static SerdStatus skip_bom(SerdReader* const me) { - if (serd_byte_source_peek(&me->source) == 0xEF) { - if (serd_byte_source_advance(&me->source) || - serd_byte_source_peek(&me->source) != 0xBB || - serd_byte_source_advance(&me->source) || - serd_byte_source_peek(&me->source) != 0xBF || - serd_byte_source_advance(&me->source)) { + if (serd_byte_source_peek(me->source) == 0xEF) { + if (serd_byte_source_advance(me->source) || + serd_byte_source_peek(me->source) != 0xBB || + serd_byte_source_advance(me->source) || + serd_byte_source_peek(me->source) != 0xBF || + serd_byte_source_advance(me->source)) { r_err(me, SERD_BAD_SYNTAX, "corrupt byte order mark"); return SERD_BAD_SYNTAX; } @@ -298,64 +302,31 @@ skip_bom(SerdReader* const me) } SerdStatus -serd_reader_start_stream(SerdReader* const reader, - const SerdReadFunc read_func, - const SerdStreamErrorFunc error_func, - void* const stream, - const SerdNode* const name, - const size_t page_size) +serd_reader_start(SerdReader* const reader, + SerdInputStream* const input, + const SerdNode* const input_name, + const size_t block_size) { assert(reader); + assert(input); - return serd_byte_source_open_source( - &reader->source, read_func, error_func, NULL, stream, name, page_size); -} - -SerdStatus -serd_reader_start_file(SerdReader* reader, const char* uri, bool bulk) -{ - char* const path = serd_parse_file_uri(uri, NULL); - if (!path) { - return SERD_BAD_ARG; - } - - FILE* fd = serd_world_fopen(reader->world, path, "rb"); - free(path); - if (!fd) { - return SERD_BAD_STREAM; + if (reader->source) { + return SERD_BAD_CALL; } - SerdNode* const name = serd_new_uri(serd_string(uri)); - const SerdStatus st = serd_byte_source_open_source( - &reader->source, - bulk ? (SerdReadFunc)fread : serd_file_read_byte, - (SerdStreamErrorFunc)ferror, - (SerdStreamCloseFunc)fclose, - fd, - name, - bulk ? SERD_PAGE_SIZE : 1U); - serd_node_free(name); - return st; -} + reader->source = serd_byte_source_new_input(input, input_name, block_size); -SerdStatus -serd_reader_start_string(SerdReader* const reader, - const char* const utf8, - const SerdNode* const name) -{ - return serd_byte_source_open_string(&reader->source, utf8, name); + return reader->source ? SERD_SUCCESS : SERD_BAD_ARG; } static SerdStatus serd_reader_prepare(SerdReader* const reader) { - SerdStatus st = serd_byte_source_prepare(&reader->source); + SerdStatus st = serd_byte_source_prepare(reader->source); if (st == SERD_SUCCESS) { st = skip_bom(reader); } else if (st == SERD_FAILURE) { - reader->source.eof = true; - } else { - r_err(reader, st, "error preparing read: %s", strerror(errno)); + reader->source->eof = true; } return st; } @@ -366,15 +337,19 @@ serd_reader_read_chunk(SerdReader* const reader) assert(reader); SerdStatus st = SERD_SUCCESS; - if (!reader->source.prepared) { + if (!reader->source) { + return SERD_BAD_CALL; + } + + if (!reader->source->prepared) { st = serd_reader_prepare(reader); - } else if (reader->source.eof) { - st = serd_byte_source_advance(&reader->source); + } else if (reader->source->eof) { + st = serd_byte_source_advance(reader->source); } if (peek_byte(reader) == 0) { // Skip leading null byte, for reading from a null-delimited socket - serd_byte_source_advance(&reader->source); + serd_byte_source_advance(reader->source); return SERD_FAILURE; } @@ -402,5 +377,7 @@ serd_reader_finish(SerdReader* const reader) { assert(reader); - return serd_byte_source_close(&reader->source); + serd_byte_source_free(reader->source); + reader->source = NULL; + return SERD_SUCCESS; } diff --git a/src/reader.h b/src/reader.h index 17c0c49d..73647307 100644 --- a/src/reader.h +++ b/src/reader.h @@ -41,11 +41,10 @@ struct SerdReaderImpl { SerdNode* rdf_first; SerdNode* rdf_rest; SerdNode* rdf_nil; - SerdByteSource source; + SerdByteSource* source; SerdStack stack; SerdSyntax syntax; unsigned next_id; - uint8_t* buf; char* bprefix; size_t bprefix_len; bool strict; ///< True iff strict parsing @@ -93,7 +92,7 @@ emit_statement(SerdReader* reader, ReadContext ctx, SerdNode* o); static inline int peek_byte(SerdReader* reader) { - SerdByteSource* source = &reader->source; + SerdByteSource* source = reader->source; return source->eof ? EOF : (int)source->read_buf[source->read_head]; } @@ -105,7 +104,7 @@ skip_byte(SerdReader* reader, const int byte) assert(peek_byte(reader) == byte); - return serd_byte_source_advance(&reader->source); + return serd_byte_source_advance(reader->source); } static inline int @@ -114,7 +113,7 @@ eat_byte(SerdReader* const reader) const int c = peek_byte(reader); if (c != EOF) { - serd_byte_source_advance(&reader->source); + serd_byte_source_advance(reader->source); } return c; @@ -127,7 +126,7 @@ eat_byte_safe(SerdReader* reader, const int byte) assert(peek_byte(reader) == byte); - serd_byte_source_advance(&reader->source); + serd_byte_source_advance(reader->source); return byte; } diff --git a/src/serdi.c b/src/serdi.c index c03d3102..89d7ebd9 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -1,10 +1,9 @@ // Copyright 2011-2023 David Robillard <d@drobilla.net> // SPDX-License-Identifier: ISC -#include "system.h" - #include "serd/env.h" #include "serd/error.h" +#include "serd/input_stream.h" #include "serd/node.h" #include "serd/reader.h" #include "serd/status.h" @@ -268,24 +267,26 @@ main(int argc, char** argv) serd_writer_chop_blank_prefix(writer, chop_prefix); serd_reader_add_blank_prefix(reader, add_prefix); - SerdStatus st = SERD_SUCCESS; - SerdNode* input_name = NULL; + SerdStatus st = SERD_SUCCESS; + SerdNode* input_name = NULL; + const char* position = NULL; + SerdInputStream in = {NULL, NULL, NULL, NULL}; + size_t block_size = 1U; if (from_string) { + position = input; + in = serd_open_input_string(&position); input_name = serd_new_string(serd_string("string")); - st = serd_reader_start_string(reader, input, input_name); } else if (from_stdin) { + in = serd_open_input_stream( + (SerdReadFunc)fread, (SerdErrorFunc)ferror, (SerdCloseFunc)fclose, stdin); input_name = serd_new_string(serd_string("stdin")); - st = serd_reader_start_stream(reader, - serd_file_read_byte, - (SerdStreamErrorFunc)ferror, - stdin, - input_name, - 1); } else { - st = serd_reader_start_file(reader, input, bulk_read); + block_size = bulk_read ? 4096U : 1U; + in = serd_open_input_file(input); + input_name = serd_new_string(serd_string(input)); } - if (!st) { + if (!(st = serd_reader_start(reader, &in, input_name, block_size))) { st = serd_reader_read_document(reader); } diff --git a/src/world.c b/src/world.c index cb4ba02d..9f10de49 100644 --- a/src/world.c +++ b/src/world.c @@ -5,44 +5,17 @@ #include "caret.h" #include "node.h" -#include "serd_config.h" -#include "system.h" #include "serd/node.h" #include "serd/string_view.h" -#if defined(USE_POSIX_FADVISE) -# include <fcntl.h> -#endif - #include <assert.h> -#include <errno.h> #include <stdarg.h> #include <stdio.h> #include <stdlib.h> #include <string.h> -FILE* -serd_world_fopen(SerdWorld* world, const char* path, const char* mode) -{ - FILE* fd = fopen(path, mode); - if (!fd) { - char message[1024] = {0}; - serd_system_strerror(errno, message, sizeof(message)); - - serd_world_errorf( - world, SERD_BAD_STREAM, "failed to open file %s (%s)\n", path, message); - return NULL; - } - -#if USE_POSIX_FADVISE && USE_FILENO - (void)posix_fadvise(fileno(fd), 0, 0, POSIX_FADV_SEQUENTIAL); -#endif - - return fd; -} - SerdStatus serd_world_error(const SerdWorld* const world, const SerdError* const e) { diff --git a/src/world.h b/src/world.h index 83393a5a..56db46cc 100644 --- a/src/world.h +++ b/src/world.h @@ -9,8 +9,8 @@ #include "serd/status.h" #include "serd/world.h" +#include <stdarg.h> #include <stdint.h> -#include <stdio.h> struct SerdWorldImpl { SerdLimits limits; @@ -20,10 +20,6 @@ struct SerdWorldImpl { SerdNode* blank_node; }; -/// Open a file configured for fast sequential reading -FILE* -serd_world_fopen(SerdWorld* world, const char* path, const char* mode); - SerdStatus serd_world_error(const SerdWorld* world, const SerdError* e); diff --git a/test/meson.build b/test/meson.build index e0aea675..496296c1 100644 --- a/test/meson.build +++ b/test/meson.build @@ -289,6 +289,17 @@ if is_variable('serdi') env: test_env, suite: 'io', ) + + if host_machine.system() == 'linux' + test( + 'unreadable', + serdi, + args: ['/sys/bus/pci/rescan'], + env: test_env, + should_fail: true, + suite: 'io', + ) + endif endif ########################### diff --git a/test/test_overflow.c b/test/test_overflow.c index 6e018033..086d353b 100644 --- a/test/test_overflow.c +++ b/test/test_overflow.c @@ -27,8 +27,15 @@ test_size(SerdWorld* const world, return SERD_BAD_STACK; } - serd_reader_start_string(reader, str, NULL); + SerdNode* string_name = serd_new_string(serd_string("string")); + const char* position = str; + SerdInputStream in = serd_open_input_string(&position); + serd_reader_start(reader, &in, string_name, 1); + const SerdStatus st = serd_reader_read_document(reader); + + serd_close_input(&in); + serd_node_free(string_name); serd_reader_free(reader); serd_sink_free(sink); diff --git a/test/test_reader.c b/test/test_reader.c index f33c3429..dd090d83 100644 --- a/test/test_reader.c +++ b/test/test_reader.c @@ -4,6 +4,7 @@ #undef NDEBUG #include "serd/event.h" +#include "serd/input_stream.h" #include "serd/reader.h" #include "serd/sink.h" #include "serd/status.h" @@ -11,6 +12,7 @@ #include "serd/syntax.h" #include "serd/world.h" #include "zix/allocator.h" +#include "zix/attributes.h" #include "zix/filesystem.h" #include "zix/path.h" @@ -19,7 +21,6 @@ #endif #include <assert.h> -#include <stdbool.h> #include <stdio.h> #include <string.h> @@ -53,6 +54,64 @@ test_sink(void* handle, const SerdEvent* event) return SERD_SUCCESS; } +ZIX_PURE_FUNC static size_t +prepare_test_read(void* buf, size_t size, size_t nmemb, void* stream) +{ + assert(size == 1); + assert(nmemb == 1); + + (void)buf; + (void)size; + (void)nmemb; + (void)stream; + + return 0; +} + +static int +prepare_test_error(void* stream) +{ + (void)stream; + return 1; +} + +static void +test_prepare_error(const char* const path) +{ + SerdWorld* const world = serd_world_new(); + ReaderTest rt = {0, 0, 0, 0}; + + FILE* const f = fopen(path, "w+b"); + assert(f); + + fprintf(f, "_:s <http://example.org/p> _:o .\n"); + fflush(f); + fseek(f, 0L, SEEK_SET); + + SerdSink* const sink = serd_sink_new(&rt, test_sink, NULL); + assert(sink); + + SerdReader* const reader = serd_reader_new(world, SERD_TURTLE, 0, sink); + assert(reader); + + SerdInputStream in = + serd_open_input_stream(prepare_test_read, prepare_test_error, NULL, f); + + assert(serd_reader_start(reader, &in, NULL, 0) == SERD_BAD_ARG); + + SerdStatus st = serd_reader_start(reader, &in, NULL, 1); + assert(!st); + + assert(serd_reader_read_document(reader) == SERD_BAD_STREAM); + + serd_close_input(&in); + serd_reader_free(reader); + serd_sink_free(sink); + serd_world_free(world); + fclose(f); + assert(!zix_remove(path)); +} + static void test_read_string(void) { @@ -64,19 +123,38 @@ test_read_string(void) SerdReader* reader = serd_reader_new(world, SERD_TURTLE, 0U, sink); assert(reader); - // Test reading a string that ends exactly at the end of input (no newline) - assert( - !serd_reader_start_string(reader, - "<http://example.org/s> <http://example.org/p> " - "<http://example.org/o> .", - NULL)); + static const char* const string1 = + "<http://example.org/s> <http://example.org/p> " + "<http://example.org/o> ."; + + const char* position = string1; + SerdInputStream in = serd_open_input_string(&position); + // Test reading a string that ends exactly at the end of input (no newline) + assert(!serd_reader_start(reader, &in, NULL, 1)); assert(!serd_reader_read_document(reader)); assert(rt.n_base == 0); assert(rt.n_prefix == 0); assert(rt.n_statement == 1); assert(rt.n_end == 0); assert(!serd_reader_finish(reader)); + assert(!serd_close_input(&in)); + + static const char* const string2 = + "<http://example.org/s> <http://example.org/p> " + "<http://example.org/o> , _:blank ."; + + // Test reading a chunk + rt.n_statement = 0; + position = string2; + in = serd_open_input_string(&position); + + assert(!serd_reader_start(reader, &in, NULL, 1)); + assert(!serd_reader_read_chunk(reader)); + assert(rt.n_statement == 2); + assert(serd_reader_read_chunk(reader) == SERD_FAILURE); + assert(!serd_reader_finish(reader)); + assert(!serd_close_input(&in)); serd_reader_free(reader); serd_sink_free(sink); @@ -143,12 +221,15 @@ test_read_eof_by_page(const char* const path) SerdSink* sink = serd_sink_new(&ignored, test_sink, NULL); SerdReader* reader = serd_reader_new(world, SERD_TURTLE, 0U, sink); - serd_reader_start_stream( - reader, (SerdReadFunc)fread, (SerdStreamErrorFunc)ferror, f, NULL, 4096); + SerdInputStream in = + serd_open_input_stream((SerdReadFunc)fread, (SerdErrorFunc)ferror, NULL, f); + assert(serd_reader_start(reader, &in, NULL, 4096) == SERD_SUCCESS); assert(serd_reader_read_chunk(reader) == SERD_SUCCESS); assert(serd_reader_read_chunk(reader) == SERD_FAILURE); assert(serd_reader_read_chunk(reader) == SERD_FAILURE); + assert(!serd_reader_finish(reader)); + assert(!serd_close_input(&in)); serd_reader_free(reader); serd_sink_free(sink); @@ -166,18 +247,17 @@ test_read_eof_by_byte(void) SerdSink* sink = serd_sink_new(&ignored, test_sink, NULL); SerdReader* reader = serd_reader_new(world, SERD_TURTLE, 0U, sink); - size_t n_reads = 0U; - serd_reader_start_stream(reader, - (SerdReadFunc)eof_test_read, - (SerdStreamErrorFunc)eof_test_error, - &n_reads, - NULL, - 1); + size_t n_reads = 0U; + SerdInputStream in = serd_open_input_stream( + (SerdReadFunc)eof_test_read, (SerdErrorFunc)eof_test_error, NULL, &n_reads); + assert(serd_reader_start(reader, &in, NULL, 1) == SERD_SUCCESS); assert(serd_reader_read_chunk(reader) == SERD_SUCCESS); assert(serd_reader_read_chunk(reader) == SERD_FAILURE); assert(serd_reader_read_chunk(reader) == SERD_SUCCESS); assert(serd_reader_read_chunk(reader) == SERD_FAILURE); + assert(!serd_reader_finish(reader)); + assert(!serd_close_input(&in)); serd_reader_free(reader); serd_sink_free(sink); @@ -218,8 +298,10 @@ test_read_nquads_chunks(const char* const path) SerdReader* const reader = serd_reader_new(world, SERD_NQUADS, 0U, sink); assert(reader); - SerdStatus st = serd_reader_start_stream( - reader, (SerdReadFunc)fread, (SerdStreamErrorFunc)ferror, f, NULL, 1); + SerdInputStream in = + serd_open_input_stream((SerdReadFunc)fread, (SerdErrorFunc)ferror, NULL, f); + + SerdStatus st = serd_reader_start(reader, &in, NULL, 1); assert(st == SERD_SUCCESS); // Read first statement @@ -264,6 +346,7 @@ test_read_nquads_chunks(const char* const path) assert(serd_reader_read_chunk(reader) == SERD_FAILURE); + assert(!serd_close_input(&in)); serd_reader_free(reader); serd_sink_free(sink); serd_world_free(world); @@ -297,8 +380,10 @@ test_read_turtle_chunks(const char* const path) SerdReader* reader = serd_reader_new(world, SERD_TURTLE, 0U, sink); assert(reader); - SerdStatus st = serd_reader_start_stream( - reader, (SerdReadFunc)fread, (SerdStreamErrorFunc)ferror, f, NULL, 1); + SerdInputStream in = + serd_open_input_stream((SerdReadFunc)fread, (SerdErrorFunc)ferror, NULL, f); + + SerdStatus st = serd_reader_start(reader, &in, NULL, 1); assert(st == SERD_SUCCESS); // Read base @@ -359,6 +444,7 @@ test_read_turtle_chunks(const char* const path) assert(serd_reader_read_chunk(reader) == SERD_FAILURE); + assert(!serd_close_input(&in)); serd_reader_free(reader); serd_sink_free(sink); serd_world_free(world); @@ -366,29 +452,9 @@ test_read_turtle_chunks(const char* const path) assert(!zix_remove(path)); } -static size_t -empty_test_read(void* buf, size_t size, size_t nmemb, void* stream) -{ - (void)buf; - (void)size; - (void)nmemb; - (void)stream; - - assert(false); - - return 0; -} - -static int -empty_test_error(void* stream) -{ - (void)stream; - return 0; -} - /// Test that reading SERD_SYNTAX_EMPTY "succeeds" without reading any input static void -test_read_empty(void) +test_read_empty(const char* const path) { SerdWorld* const world = serd_world_new(); ReaderTest rt = {0, 0, 0, 0}; @@ -399,13 +465,22 @@ test_read_empty(void) SerdReader* const reader = serd_reader_new(world, SERD_SYNTAX_EMPTY, 0, sink); assert(reader); - SerdStatus st = serd_reader_start_stream( - reader, empty_test_read, empty_test_error, &rt, NULL, 1); - assert(st == SERD_SUCCESS); + FILE* const f = fopen(path, "w+b"); + assert(f); + + SerdInputStream in = + serd_open_input_stream((SerdReadFunc)fread, (SerdErrorFunc)ferror, NULL, f); + + SerdStatus st = serd_reader_start(reader, &in, NULL, 1); + assert(!st); assert(serd_reader_read_document(reader) == SERD_SUCCESS); assert(rt.n_statement == 0); + assert(!serd_reader_finish(reader)); + assert(!serd_close_input(&in)); + fclose(f); + assert(!zix_remove(path)); serd_reader_free(reader); serd_sink_free(sink); serd_world_free(world); @@ -422,12 +497,13 @@ main(void) test_read_nquads_chunks(nq_path); test_read_turtle_chunks(ttl_path); + test_prepare_error(ttl_path); test_read_string(); test_read_eof_by_page(ttl_path); test_read_eof_by_byte(); test_read_nquads_chunks(nq_path); test_read_turtle_chunks(ttl_path); - test_read_empty(); + test_read_empty(ttl_path); assert(!zix_remove(dir)); diff --git a/test/test_reader_writer.c b/test/test_reader_writer.c index acd43ad1..1c4a415b 100644 --- a/test/test_reader_writer.c +++ b/test/test_reader_writer.c @@ -6,6 +6,7 @@ #include "serd/buffer.h" #include "serd/env.h" #include "serd/event.h" +#include "serd/input_stream.h" #include "serd/memory.h" #include "serd/node.h" #include "serd/reader.h" @@ -22,7 +23,6 @@ #include <assert.h> #include <errno.h> -#include <stdbool.h> #include <stdint.h> #include <stdio.h> #include <string.h> @@ -116,11 +116,15 @@ test_write_errors(void) const SerdSink* const sink = serd_writer_sink(writer); SerdReader* const reader = serd_reader_new(world, SERD_TRIG, 0U, sink); - SerdStatus st = serd_reader_start_string(reader, doc_string, NULL); + const char* position = doc_string; + SerdInputStream in = serd_open_input_string(&position); + + SerdStatus st = serd_reader_start(reader, &in, NULL, 1); assert(!st); st = serd_reader_read_document(reader); assert(st == SERD_BAD_WRITE); + assert(!serd_close_input(&in)); serd_reader_free(reader); serd_writer_free(writer); serd_env_free(env); @@ -252,8 +256,8 @@ test_reader(const char* path) SerdReader* reader = serd_reader_new(world, SERD_TURTLE, 0U, sink); assert(reader); - assert(serd_reader_read_chunk(reader) == SERD_FAILURE); - assert(serd_reader_read_document(reader) == SERD_FAILURE); + assert(serd_reader_read_chunk(reader) == SERD_BAD_CALL); + assert(serd_reader_read_document(reader) == SERD_BAD_CALL); serd_reader_add_blank_prefix(reader, "tmp"); @@ -266,14 +270,12 @@ test_reader(const char* path) # pragma GCC diagnostic pop #endif - assert(serd_reader_start_file(reader, "http://notafile", false)); - assert(serd_reader_start_file(reader, "file://invalid", false)); - assert(serd_reader_start_file(reader, "file:///nonexistant", false)); - - assert(!serd_reader_start_file(reader, path, true)); + SerdInputStream in = serd_open_input_file(path); + assert(!serd_reader_start(reader, &in, NULL, 4096)); assert(!serd_reader_read_document(reader)); assert(rt.n_statement == 6); assert(!serd_reader_finish(reader)); + serd_close_input(&in); serd_reader_free(reader); serd_sink_free(sink); |