From 55e28966226268a57edb07419ac419ef53ac437d Mon Sep 17 00:00:00 2001 From: David Robillard Date: Sun, 28 Jun 2020 23:26:48 +0200 Subject: Make Reader always read from a ByteSource --- .includes.imp | 2 + include/serd/serd.h | 121 ++++++++++++++++++++++++---------- src/byte_source.c | 165 ++++++++++++++++++++++++++++++++-------------- src/byte_source.h | 61 +++++++---------- src/n3.c | 4 +- src/reader.c | 87 +++++++++--------------- src/reader.h | 6 +- src/serdi.c | 36 ++++++---- src/string.c | 2 + src/world.c | 27 -------- src/world.h | 5 -- test/meson.build | 16 +++-- test/test_byte_source.c | 40 +++++++++++ test/test_free_null.c | 1 + test/test_overflow.c | 6 +- test/test_read_chunk.c | 25 +++---- test/test_reader.c | 109 +++++++++++++++++++++++++----- test/test_reader_writer.c | 13 ++-- test/test_string.c | 2 +- 19 files changed, 460 insertions(+), 268 deletions(-) create mode 100644 test/test_byte_source.c diff --git a/.includes.imp b/.includes.imp index 9ff42384..3829d78c 100644 --- a/.includes.imp +++ b/.includes.imp @@ -7,4 +7,6 @@ { "symbol": [ "uint32_t", "private", "", "public" ] }, { "symbol": [ "uint64_t", "private", "", "public" ] }, { "symbol": [ "uint8_t", "private", "", "public" ] }, + + { "include": [ "", "private", "", "public", ] }, ] diff --git a/include/serd/serd.h b/include/serd/serd.h index 362de28d..82292389 100644 --- a/include/serd/serd.h +++ b/include/serd/serd.h @@ -206,6 +206,7 @@ typedef enum { SERD_ERR_BAD_TEXT, ///< Invalid text encoding SERD_ERR_BAD_WRITE, ///< Error writing to file/stream SERD_ERR_NO_DATA, ///< Unexpected end of input + SERD_ERR_BAD_CALL, ///< Invalid call } SerdStatus; /** @@ -274,15 +275,6 @@ serd_canonical_path(const char* SERD_NONNULL path); @{ */ -/** - Function to detect I/O stream errors. - - Identical semantics to `ferror`. - - @return Non-zero if `stream` has encountered an error. -*/ -typedef int (*SerdStreamErrorFunc)(void* SERD_NONNULL stream); - /** Source function for raw string input. @@ -317,6 +309,24 @@ typedef size_t (*SerdWriteFunc)(const void* SERD_NONNULL buf, size_t nmemb, void* SERD_NONNULL stream); +/** + Function to detect I/O stream errors. + + Identical semantics to `ferror`. + + @return Non-zero if `stream` has encountered an error. +*/ +typedef int (*SerdStreamErrorFunc)(void* SERD_NONNULL stream); + +/** + Function to close an I/O stream. + + Identical semantics to `fclose`. + + @return Non-zero if `stream` has encountered an error. +*/ +typedef int (*SerdStreamCloseFunc)(void* SERD_NONNULL stream); + /** @} @defgroup serd_syntax Syntax Utilities @@ -1471,6 +1481,70 @@ void serd_env_write_prefixes(const SerdEnv* SERD_NONNULL env, const SerdSink* SERD_NONNULL sink); +/** + @} + @defgroup serd_byte_source Byte Source + @{ +*/ + +/// A source for bytes that provides text input +typedef struct SerdByteSourceImpl SerdByteSource; + +/** + Create a new byte source that reads from a string. + + @param string Null-terminated UTF-8 string to read from. + @param name Optional name of stream for error messages (string or URI). +*/ +SERD_API +SerdByteSource* SERD_ALLOCATED +serd_byte_source_new_string(const char* SERD_NONNULL string, + const SerdNode* SERD_NULLABLE name); + +/** + Create a new byte source that reads from a file. + + An arbitrary `FILE*` can be used via serd_byte_source_new_function() as + well, this is just a convenience function that opens the file properly, sets + flags for optimized I/O if possible, and automatically sets the name of the + source to the file path. + + @param path Path of file to open and read from. + @param page_size Number of bytes to read per call. +*/ +SERD_API +SerdByteSource* SERD_ALLOCATED +serd_byte_source_new_filename(const char* SERD_NONNULL path, size_t page_size); + +/** + Create a new byte source that reads from a user-specified function + + The `stream` will be passed to the `read_func`, which is compatible with the + standard C `fread` if `stream` is a `FILE*`. Note that the reader only ever + reads individual bytes at a time, that is, the `size` parameter will always + be 1 (but `nmemb` may be higher). + + @param read_func Stream read function, like `fread`. + @param error_func Stream error function, like `ferror`. + @param close_func Stream close function, like `fclose`. + @param stream Context parameter passed to `read_func` and `error_func`. + @param name Optional name of stream for error messages (string or URI). + @param page_size Number of bytes to read per call. +*/ +SERD_API +SerdByteSource* SERD_ALLOCATED +serd_byte_source_new_function(SerdReadFunc SERD_NONNULL read_func, + SerdStreamErrorFunc SERD_NONNULL error_func, + SerdStreamCloseFunc SERD_NULLABLE close_func, + void* SERD_NULLABLE stream, + const SerdNode* SERD_NULLABLE name, + size_t page_size); + +/// Free `source` +SERD_API +void +serd_byte_source_free(SerdByteSource* SERD_NULLABLE source); + /** @} @defgroup serd_reader Reader @@ -1511,34 +1585,11 @@ void serd_reader_add_blank_prefix(SerdReader* SERD_NONNULL reader, const char* SERD_NULLABLE prefix); -/// Prepare to read from the file at a local file `uri` -SERD_API -SerdStatus -serd_reader_start_file(SerdReader* SERD_NONNULL reader, - const char* SERD_NONNULL uri, - bool bulk); - -/** - Prepare to read from a stream. - - The `read_func` is guaranteed to only be called for `page_size` elements - with size 1 (i.e. `page_size` bytes). -*/ -SERD_API -SerdStatus -serd_reader_start_stream(SerdReader* SERD_NONNULL reader, - SerdReadFunc SERD_NONNULL read_func, - SerdStreamErrorFunc SERD_NONNULL error_func, - void* SERD_NONNULL stream, - const SerdNode* SERD_NULLABLE name, - size_t page_size); - -/// Prepare to read from a string +/// Prepare to read from a byte source SERD_API SerdStatus -serd_reader_start_string(SerdReader* SERD_NONNULL reader, - const char* SERD_NONNULL utf8, - const SerdNode* SERD_NULLABLE name); +serd_reader_start(SerdReader* SERD_NONNULL reader, + SerdByteSource* SERD_NONNULL byte_source); /** Read a single "chunk" of data during an incremental read. diff --git a/src/byte_source.c b/src/byte_source.c index 2e4f66cb..ef6bf3bb 100644 --- a/src/byte_source.c +++ b/src/byte_source.c @@ -17,12 +17,21 @@ #include "byte_source.h" #include "caret.h" +#include "serd_config.h" #include "system.h" #include "serd/serd.h" +#include + +#if USE_POSIX_FADVISE && USE_FILENO +# include +#endif + #include #include +#include +#include #include SerdStatus @@ -50,28 +59,34 @@ serd_byte_source_page(SerdByteSource* const source) return SERD_SUCCESS; } -SerdStatus -serd_byte_source_open_source(SerdByteSource* const source, - const SerdReadFunc read_func, - const SerdStreamErrorFunc error_func, - const SerdStreamCloseFunc close_func, - void* const stream, - const SerdNode* const name, - const size_t page_size) +SerdByteSource* +serd_byte_source_new_function(const SerdReadFunc read_func, + const SerdStreamErrorFunc error_func, + const SerdStreamCloseFunc close_func, + void* const stream, + const SerdNode* const name, + const size_t page_size) { - assert(page_size > 0); - memset(source, '\0', sizeof(*source)); - source->read_func = read_func; - source->error_func = error_func; - source->close_func = close_func; - source->stream = stream; - source->page_size = page_size; - source->buf_size = page_size; - source->name = serd_node_copy(name); - source->caret.file = source->name; - source->caret.line = 1u; - source->caret.col = 1u; - source->from_stream = true; + if (!page_size) { + return NULL; + } + + SerdByteSource* source = (SerdByteSource*)calloc(1, sizeof(SerdByteSource)); + + source->read_func = read_func; + source->error_func = error_func; + source->close_func = close_func; + source->stream = stream; + source->page_size = page_size; + source->buf_size = page_size; + source->type = FROM_FUNCTION; + + source->name = + name ? serd_node_copy(name) : serd_new_string(SERD_STRING("func")); + + source->caret.file = source->name; + source->caret.line = 1u; + source->caret.col = 1u; if (page_size > 1) { source->file_buf = (uint8_t*)serd_allocate_buffer(page_size); @@ -81,58 +96,112 @@ serd_byte_source_open_source(SerdByteSource* const source, source->read_buf = &source->read_byte; } - return SERD_SUCCESS; + return source; } -SerdStatus -serd_byte_source_prepare(SerdByteSource* const source) +static bool +is_directory(const char* const path) { - if (source->page_size == 0) { - return SERD_FAILURE; +#ifdef _MSC_VER + struct stat st; + return !stat(path, &st) && (st.st_mode & _S_IFDIR); +#else + struct stat st; + return !stat(path, &st) && S_ISDIR(st.st_mode); +#endif +} + +SerdByteSource* +serd_byte_source_new_filename(const char* const path, const size_t page_size) +{ + if (page_size == 0 || is_directory(path)) { + return NULL; } - source->prepared = true; + FILE* const fd = fopen(path, "rb"); + if (!fd) { + return NULL; + } - if (source->from_stream) { - return (source->page_size > 1 ? serd_byte_source_page(source) - : serd_byte_source_advance(source)); + SerdByteSource* source = (SerdByteSource*)calloc(1, sizeof(SerdByteSource)); + + source->read_func = (SerdReadFunc)fread; + source->error_func = (SerdStreamErrorFunc)ferror; + source->close_func = (SerdStreamCloseFunc)fclose; + source->stream = fd; + source->page_size = page_size; + source->buf_size = page_size; + + source->name = serd_new_file_uri(SERD_STRING(path), SERD_EMPTY_STRING()); + source->type = FROM_FILENAME; + + source->caret.file = source->name; + source->caret.line = 1u; + source->caret.col = 1u; + + if (page_size > 1) { + source->file_buf = (uint8_t*)serd_allocate_buffer(page_size); + source->read_buf = source->file_buf; + memset(source->file_buf, '\0', page_size); + } else { + source->read_buf = &source->read_byte; } - return SERD_SUCCESS; +#if USE_POSIX_FADVISE && USE_FILENO + posix_fadvise(fileno(fd), 0, 0, POSIX_FADV_SEQUENTIAL); +#endif + + return source; } -SerdStatus -serd_byte_source_open_string(SerdByteSource* const source, - const char* const utf8, - const SerdNode* const name) +SerdByteSource* +serd_byte_source_new_string(const char* const string, + const SerdNode* const name) { - memset(source, '\0', sizeof(*source)); + SerdByteSource* source = (SerdByteSource*)calloc(1, sizeof(SerdByteSource)); source->page_size = 1; + source->read_buf = (const uint8_t*)string; + source->type = FROM_STRING; + source->name = name ? serd_node_copy(name) : serd_new_string(SERD_STRING("string")); - source->read_buf = (const uint8_t*)utf8; source->caret.file = source->name; source->caret.line = 1u; source->caret.col = 1u; - return SERD_SUCCESS; + return source; } SerdStatus -serd_byte_source_close(SerdByteSource* const source) +serd_byte_source_prepare(SerdByteSource* const source) { - SerdStatus st = SERD_SUCCESS; - if (source->close_func) { - st = source->close_func(source->stream) ? SERD_ERR_UNKNOWN : SERD_SUCCESS; - } + source->prepared = true; + if (source->type != FROM_STRING) { + if (source->page_size > 1) { + return serd_byte_source_page(source); + } - if (source->page_size > 1) { - serd_free_aligned(source->file_buf); + return serd_byte_source_advance(source); } - serd_node_free(source->name); - memset(source, '\0', sizeof(*source)); - return st; + return SERD_SUCCESS; +} + +void +serd_byte_source_free(SerdByteSource* const source) +{ + if (source) { + if (source->close_func) { + source->close_func(source->stream); + } + + if (source->page_size > 1) { + serd_free_aligned(source->file_buf); + } + + serd_node_free(source->name); + free(source); + } } diff --git a/src/byte_source.h b/src/byte_source.h index 1c9fbd1f..d054e156 100644 --- a/src/byte_source.h +++ b/src/byte_source.h @@ -26,42 +26,29 @@ #include #include -typedef int (*SerdStreamCloseFunc)(void*); - -typedef struct { - SerdReadFunc read_func; ///< Read function (e.g. fread) - SerdStreamErrorFunc error_func; ///< Error function (e.g. ferror) - SerdStreamCloseFunc close_func; ///< Function for closing stream - void* stream; ///< Stream (e.g. FILE) - size_t page_size; ///< Number of bytes to read at a time - size_t buf_size; ///< Number of bytes in file_buf - SerdNode* name; ///< Name of stream (referenced by cur) - SerdCaret caret; ///< Caret for error reporting - uint8_t* file_buf; ///< Buffer iff reading pages from a file - const uint8_t* read_buf; ///< Pointer to file_buf or read_byte - size_t read_head; ///< Offset into read_buf - uint8_t read_byte; ///< 1-byte 'buffer' used when not paging - bool from_stream; ///< True iff reading from `stream` - bool prepared; ///< True iff prepared for reading - bool eof; ///< True iff end of file reached -} SerdByteSource; - -SerdStatus -serd_byte_source_open_string(SerdByteSource* source, - const char* utf8, - const SerdNode* name); - -SerdStatus -serd_byte_source_open_source(SerdByteSource* source, - SerdReadFunc read_func, - SerdStreamErrorFunc error_func, - SerdStreamCloseFunc close_func, - void* stream, - const SerdNode* name, - size_t page_size); - -SerdStatus -serd_byte_source_close(SerdByteSource* source); +typedef enum { + FROM_STRING, ///< Reading from a user-provided buffer + FROM_FILENAME, ///< Reading from a file we opened + FROM_FUNCTION, ///< Reading from a user-provided function +} SerdByteSourceType; + +struct SerdByteSourceImpl { + SerdReadFunc read_func; ///< Read function (e.g. fread) + SerdStreamErrorFunc error_func; ///< Error function (e.g. ferror) + SerdStreamCloseFunc close_func; ///< Function for closing stream + void* stream; ///< Stream (e.g. FILE) + size_t page_size; ///< Number of bytes to read at a time + size_t buf_size; ///< Number of bytes in file_buf + SerdNode* name; ///< Name of stream (referenced by cur) + SerdCaret caret; ///< File position for error reporting + uint8_t* file_buf; ///< Buffer iff reading pages from a file + const uint8_t* read_buf; ///< Pointer to file_buf or read_byte + size_t read_head; ///< Offset into read_buf + SerdByteSourceType type; ///< Type of input + uint8_t read_byte; ///< 1-byte 'buffer' used when not paging + bool prepared; ///< True iff prepared for reading + bool eof; ///< True iff end of file reached +}; SerdStatus serd_byte_source_prepare(SerdByteSource* source); @@ -92,7 +79,7 @@ serd_byte_source_advance(SerdByteSource* source) ++source->caret.col; } - if (source->from_stream) { + if (source->type != FROM_STRING) { if (++source->read_head >= source->buf_size) { st = serd_byte_source_page(source); } diff --git a/src/n3.c b/src/n3.c index 46fe281b..a6aa71ba 100644 --- a/src/n3.c +++ b/src/n3.c @@ -1678,7 +1678,7 @@ skip_until(SerdReader* const reader, const uint8_t byte) SerdStatus read_turtleTrigDoc(SerdReader* const reader) { - while (!reader->source.eof) { + while (!reader->source->eof) { const size_t orig_stack_size = reader->stack.size; const SerdStatus st = read_n3_statement(reader); if (st > SERD_FAILURE) { @@ -1699,7 +1699,7 @@ SerdStatus read_nquadsDoc(SerdReader* const reader) { SerdStatus st = SERD_SUCCESS; - while (!st && !reader->source.eof) { + while (!st && !reader->source->eof) { const size_t orig_stack_size = reader->stack.size; SerdStatementFlags flags = 0; diff --git a/src/reader.c b/src/reader.c index ed6caafd..fe88ee14 100644 --- a/src/reader.c +++ b/src/reader.c @@ -37,7 +37,7 @@ r_err(SerdReader* const reader, const SerdStatus st, const char* const fmt, ...) { va_list args; va_start(args, fmt); - const SerdError e = {st, &reader->source.caret, fmt, &args}; + const SerdError e = {st, &reader->source->caret, fmt, &args}; serd_world_error(reader->world, &e); va_end(args); return st; @@ -69,7 +69,8 @@ tolerate_status(const SerdReader* const reader, const SerdStatus status) } if (status == SERD_ERR_INTERNAL || status == SERD_ERR_OVERFLOW || - status == SERD_ERR_BAD_WRITE || status == SERD_ERR_NO_DATA) { + status == SERD_ERR_BAD_WRITE || status == SERD_ERR_NO_DATA || + status == SERD_ERR_BAD_CALL) { return false; } @@ -145,7 +146,7 @@ emit_statement(SerdReader* const reader, serd_node_zero_pad(o); const SerdStatement statement = {{ctx.subject, ctx.predicate, o, ctx.graph}, - &reader->source.caret}; + &reader->source->caret}; const SerdStatus st = serd_sink_write_statement(reader->sink, *ctx.flags, &statement); @@ -163,7 +164,11 @@ read_statement(SerdReader* const reader) SerdStatus serd_reader_read_document(SerdReader* const reader) { - if (!reader->source.prepared) { + if (!reader->source) { + return SERD_ERR_BAD_CALL; + } + + if (!reader->source->prepared) { SerdStatus st = serd_reader_prepare(reader); if (st) { return st; @@ -241,12 +246,12 @@ serd_reader_add_blank_prefix(SerdReader* const reader, const char* const prefix) static SerdStatus skip_bom(SerdReader* const me) { - if (serd_byte_source_peek(&me->source) == 0xEF) { - serd_byte_source_advance(&me->source); - if (serd_byte_source_peek(&me->source) != 0xBB || - serd_byte_source_advance(&me->source) || - serd_byte_source_peek(&me->source) != 0xBF || - serd_byte_source_advance(&me->source)) { + if (serd_byte_source_peek(me->source) == 0xEF) { + serd_byte_source_advance(me->source); + if (serd_byte_source_peek(me->source) != 0xBB || + serd_byte_source_advance(me->source) || + serd_byte_source_peek(me->source) != 0xBF || + serd_byte_source_advance(me->source)) { r_err(me, SERD_ERR_BAD_SYNTAX, "corrupt byte order mark\n"); return SERD_ERR_BAD_SYNTAX; } @@ -256,60 +261,23 @@ skip_bom(SerdReader* const me) } SerdStatus -serd_reader_start_stream(SerdReader* const reader, - const SerdReadFunc read_func, - const SerdStreamErrorFunc error_func, - void* const stream, - const SerdNode* const name, - const size_t page_size) +serd_reader_start(SerdReader* const reader, SerdByteSource* const byte_source) { - return serd_byte_source_open_source( - &reader->source, read_func, error_func, NULL, stream, name, page_size); -} - -SerdStatus -serd_reader_start_file(SerdReader* reader, const char* uri, bool bulk) -{ - char* const path = serd_parse_file_uri(uri, NULL); - if (!path) { - return SERD_ERR_BAD_ARG; - } - - FILE* fd = serd_world_fopen(reader->world, path, "rb"); - free(path); - if (!fd) { - return SERD_ERR_UNKNOWN; - } + serd_reader_finish(reader); - SerdNode* const name = serd_new_uri(SERD_STRING(uri)); - const SerdStatus st = serd_byte_source_open_source( - &reader->source, - bulk ? (SerdReadFunc)fread : serd_file_read_byte, - (SerdStreamErrorFunc)ferror, - (SerdStreamCloseFunc)fclose, - fd, - name, - bulk ? SERD_PAGE_SIZE : 1u); - serd_node_free(name); - return st; -} + reader->source = byte_source; -SerdStatus -serd_reader_start_string(SerdReader* const reader, - const char* const utf8, - const SerdNode* const name) -{ - return serd_byte_source_open_string(&reader->source, utf8, name); + return reader->source ? SERD_SUCCESS : SERD_ERR_BAD_ARG; } static SerdStatus serd_reader_prepare(SerdReader* const reader) { - SerdStatus st = serd_byte_source_prepare(&reader->source); + SerdStatus st = serd_byte_source_prepare(reader->source); if (st == SERD_SUCCESS) { st = skip_bom(reader); } else if (st == SERD_FAILURE) { - reader->source.eof = true; + reader->source->eof = true; } return st; } @@ -318,10 +286,14 @@ SerdStatus serd_reader_read_chunk(SerdReader* const reader) { SerdStatus st = SERD_SUCCESS; - if (!reader->source.prepared) { + if (!reader->source) { + return SERD_ERR_BAD_CALL; + } + + if (!reader->source->prepared) { st = serd_reader_prepare(reader); - } else if (reader->source.eof) { - st = serd_byte_source_advance(&reader->source); + } else if (reader->source->eof) { + st = serd_byte_source_advance(reader->source); } return st ? st : read_statement(reader); @@ -330,5 +302,6 @@ serd_reader_read_chunk(SerdReader* const reader) SerdStatus serd_reader_finish(SerdReader* const reader) { - return serd_byte_source_close(&reader->source); + reader->source = NULL; + return SERD_SUCCESS; } diff --git a/src/reader.h b/src/reader.h index a8c154dd..76f46506 100644 --- a/src/reader.h +++ b/src/reader.h @@ -50,7 +50,7 @@ struct SerdReaderImpl { SerdNode* rdf_first; SerdNode* rdf_rest; SerdNode* rdf_nil; - SerdByteSource source; + SerdByteSource* source; SerdStack stack; SerdSyntax syntax; unsigned next_id; @@ -107,7 +107,7 @@ read_turtleTrigDoc(SerdReader* reader); static inline int peek_byte(SerdReader* reader) { - SerdByteSource* source = &reader->source; + SerdByteSource* source = reader->source; return source->eof ? EOF : (int)source->read_buf[source->read_head]; } @@ -120,7 +120,7 @@ eat_byte_safe(SerdReader* reader, const int byte) const int c = peek_byte(reader); assert(c == byte); - serd_byte_source_advance(&reader->source); + serd_byte_source_advance(reader->source); return c; } diff --git a/src/serdi.c b/src/serdi.c index 2e04ae5a..3025b494 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -27,6 +27,7 @@ # include #endif +#include #include #include #include @@ -104,25 +105,36 @@ read_file(SerdWorld* const world, syntax = syntax ? syntax : serd_guess_syntax(filename); syntax = syntax ? syntax : SERD_TRIG; - SerdStatus st = SERD_SUCCESS; - SerdReader* reader = serd_reader_new(world, syntax, flags, sink, stack_size); - - serd_reader_add_blank_prefix(reader, add_prefix); - + SerdByteSource* byte_source = NULL; if (!strcmp(filename, "-")) { SerdNode* name = serd_new_string(SERD_STRING("stdin")); - st = serd_reader_start_stream( - reader, serd_file_read_byte, (SerdStreamErrorFunc)ferror, stdin, name, 1); + byte_source = serd_byte_source_new_function( + serd_file_read_byte, (SerdStreamErrorFunc)ferror, NULL, stdin, name, 1); serd_node_free(name); } else { - st = serd_reader_start_file(reader, filename, bulk_read); + byte_source = + serd_byte_source_new_filename(filename, bulk_read ? SERD_PAGE_SIZE : 1u); + } + + if (!byte_source) { + SERDI_ERRORF( + "failed to open input file `%s' (%s)\n", filename, strerror(errno)); + + return SERD_ERR_UNKNOWN; } + SerdReader* reader = serd_reader_new(world, syntax, flags, sink, stack_size); + + serd_reader_add_blank_prefix(reader, add_prefix); + + SerdStatus st = serd_reader_start(reader, byte_source); + st = st ? st : serd_reader_read_document(reader); serd_reader_free(reader); + serd_byte_source_free(byte_source); return st; } @@ -332,6 +344,9 @@ main(int argc, char** argv) SerdStatus st = SERD_SUCCESS; SerdNode* input_name = NULL; if (input_string) { + SerdByteSource* const byte_source = + serd_byte_source_new_string(input_string, NULL); + SerdReader* const reader = serd_reader_new(world, input_syntax ? input_syntax : SERD_TRIG, @@ -341,13 +356,12 @@ main(int argc, char** argv) serd_reader_add_blank_prefix(reader, add_prefix); - SerdNode* name = serd_new_string(SERD_STRING("string")); - if (!(st = serd_reader_start_string(reader, input_string, name))) { + if (!(st = serd_reader_start(reader, byte_source))) { st = serd_reader_read_document(reader); } - serd_node_free(name); serd_reader_free(reader); + serd_byte_source_free(byte_source); } size_t prefix_len = 0; diff --git a/src/string.c b/src/string.c index 097e73ef..13fb9263 100644 --- a/src/string.c +++ b/src/string.c @@ -58,6 +58,8 @@ serd_strerror(const SerdStatus status) return "Error writing to file"; case SERD_ERR_NO_DATA: return "Unexpected end of input"; + case SERD_ERR_BAD_CALL: + return "Invalid call"; } return "Unknown error"; diff --git a/src/world.c b/src/world.c index e0ce9201..bd70d615 100644 --- a/src/world.c +++ b/src/world.c @@ -19,14 +19,7 @@ #include "caret.h" #include "namespaces.h" #include "node.h" -#include "serd_config.h" -#include "system.h" -#if defined(USE_POSIX_FADVISE) -# include -#endif - -#include #include #include #include @@ -34,26 +27,6 @@ #define BLANK_CHARS 12 -FILE* -serd_world_fopen(SerdWorld* world, const char* path, const char* mode) -{ - FILE* fd = fopen(path, mode); - if (!fd) { - char message[1024] = {0}; - serd_system_strerror(errno, message, sizeof(message)); - - serd_world_errorf( - world, SERD_ERR_INTERNAL, "failed to open file %s (%s)\n", path, message); - return NULL; - } - -#if USE_POSIX_FADVISE && USE_FILENO - posix_fadvise(fileno(fd), 0, 0, POSIX_FADV_SEQUENTIAL); -#endif - - return fd; -} - SerdStatus serd_world_error(const SerdWorld* const world, const SerdError* const e) { diff --git a/src/world.h b/src/world.h index 44e21166..a70a6e28 100644 --- a/src/world.h +++ b/src/world.h @@ -20,7 +20,6 @@ #include "serd/serd.h" #include -#include struct SerdWorldImpl { SerdNodes* nodes; @@ -37,10 +36,6 @@ struct SerdWorldImpl { uint32_t next_blank_id; }; -/// Open a file configured for fast sequential reading -FILE* -serd_world_fopen(SerdWorld* world, const char* path, const char* mode); - SerdStatus serd_world_error(const SerdWorld* world, const SerdError* e); diff --git a/test/meson.build b/test/meson.build index b6c2ce2f..5063277b 100644 --- a/test/meson.build +++ b/test/meson.build @@ -6,6 +6,7 @@ wrapper = meson.get_cross_property('exe_wrapper', '') unit_tests = [ 'byte_sink', + 'byte_source', 'caret', 'env', 'free_null', @@ -160,25 +161,32 @@ if get_option('utils') # IO errors test('read_dir', serdi, - args: ['-e', 'file://@0@/'.format(meson.source_root())], + args: ['-e', meson.source_root()], env: test_env, should_fail: true, suite: 'io_errors') test('bulk_read_dir', serdi, - args: ['file://@0@/'.format(meson.source_root())], + args: [meson.source_root()], env: test_env, should_fail: true, suite: 'io_errors') + if host_machine.system() == 'linux' + test('unreadable', serdi, + args: ['/sys/bus/pci/rescan'], + env: test_env, + should_fail: true, + suite: 'io_errors') + endif + test('write_error', files('test_write_error.py'), args: script_args + [serd_ttl], env: test_env, suite: 'io_errors') test('write_bad_file', serdi, - args: ['-w', '/does/not/exist.ttl', - 'file://@0@/serd.ttl'.format(meson.source_root())], + args: ['-w', '/does/not/exist.ttl', meson.source_root() / 'serd.ttl'], env: test_env, should_fail: true, suite: 'io_errors') diff --git a/test/test_byte_source.c b/test/test_byte_source.c new file mode 100644 index 00000000..14ef5819 --- /dev/null +++ b/test/test_byte_source.c @@ -0,0 +1,40 @@ +/* + Copyright 2021 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#undef NDEBUG + +#include "serd/serd.h" + +#include +#include +#include + +static void +test_bad_page_size(void) +{ + assert(!serd_byte_source_new_filename("file.ttl", 0)); + + assert(!serd_byte_source_new_function( + (SerdReadFunc)fread, (SerdStreamErrorFunc)ferror, NULL, NULL, NULL, 0)); +} + +int +main(void) +{ + test_bad_page_size(); + + return 0; +} diff --git a/test/test_free_null.c b/test/test_free_null.c index 15e751a9..28f7563f 100644 --- a/test/test_free_null.c +++ b/test/test_free_null.c @@ -24,6 +24,7 @@ int main(void) { serd_free(NULL); + serd_byte_source_free(NULL); serd_byte_sink_free(NULL); serd_node_free(NULL); serd_world_free(NULL); diff --git a/test/test_overflow.c b/test/test_overflow.c index 936187a9..6d5c6d0c 100644 --- a/test/test_overflow.c +++ b/test/test_overflow.c @@ -30,15 +30,17 @@ test_size(SerdWorld* const world, const SerdSyntax syntax, const size_t stack_size) { - SerdSink* sink = serd_sink_new(NULL, NULL, NULL); + SerdSink* sink = serd_sink_new(NULL, NULL, NULL); + SerdByteSource* byte_source = serd_byte_source_new_string(str, NULL); SerdReader* const reader = serd_reader_new(world, syntax, 0u, sink, stack_size); assert(reader); - serd_reader_start_string(reader, str, NULL); + serd_reader_start(reader, byte_source); const SerdStatus st = serd_reader_read_document(reader); serd_reader_free(reader); + serd_byte_source_free(byte_source); serd_sink_free(sink); return st; diff --git a/test/test_read_chunk.c b/test/test_read_chunk.c index 78cd402e..f2681e32 100644 --- a/test/test_read_chunk.c +++ b/test/test_read_chunk.c @@ -94,20 +94,22 @@ main(void) SerdWorld* world = serd_world_new(); SerdSink* sink = serd_sink_new(NULL, on_event, NULL); + SerdByteSource* byte_source = + serd_byte_source_new_string("@prefix eg: .\n" + "@base .\n" + "eg:s1 eg:p1 eg:o1 ;\n" + " eg:p2 eg:o2 ,\n" + " eg:o3 .\n" + "eg:s2 eg:p1 eg:o1 ;\n" + " eg:p2 eg:o2 .\n" + "eg:s3 eg:p1 eg:o1 .\n" + "eg:s4 eg:p1 [ eg:p3 eg:o1 ] .\n", + NULL); + SerdReader* reader = serd_reader_new(world, SERD_TURTLE, 0, sink, 4096); assert(reader); - assert(!serd_reader_start_string(reader, - "@prefix eg: .\n" - "@base .\n" - "eg:s1 eg:p1 eg:o1 ;\n" - " eg:p2 eg:o2 ,\n" - " eg:o3 .\n" - "eg:s2 eg:p1 eg:o1 ;\n" - " eg:p2 eg:o2 .\n" - "eg:s3 eg:p1 eg:o1 .\n" - "eg:s4 eg:p1 [ eg:p3 eg:o1 ] .\n", - NULL)); + assert(!serd_reader_start(reader, byte_source)); assert(!serd_reader_read_chunk(reader) && n_prefix == 1); assert(!serd_reader_read_chunk(reader) && n_base == 1); @@ -121,6 +123,7 @@ main(void) assert(!serd_reader_finish(reader)); serd_reader_free(reader); + serd_byte_source_free(byte_source); serd_sink_free(sink); serd_world_free(world); diff --git a/test/test_reader.c b/test/test_reader.c index 532411c6..7bc8ab48 100644 --- a/test/test_reader.c +++ b/test/test_reader.c @@ -32,6 +32,55 @@ count_statements(void* handle, const SerdEvent* event) return SERD_SUCCESS; } +SERD_PURE_FUNC +static size_t +prepare_test_read(void* buf, size_t size, size_t nmemb, void* stream) +{ + assert(size == 1); + assert(nmemb == 1); + + (void)buf; + (void)size; + (void)nmemb; + (void)stream; + + return 0; +} + +static int +prepare_test_error(void* stream) +{ + (void)stream; + return 1; +} + +static void +test_prepare_error(void) +{ + SerdWorld* const world = serd_world_new(); + size_t n_statements = 0; + FILE* const f = tmpfile(); + + SerdSink* const sink = serd_sink_new(&n_statements, count_statements, NULL); + assert(sink); + + SerdReader* const reader = serd_reader_new(world, SERD_TURTLE, 0, sink, 4096); + assert(reader); + + SerdByteSource* byte_source = serd_byte_source_new_function( + prepare_test_read, prepare_test_error, NULL, f, NULL, 1); + + SerdStatus st = serd_reader_start(reader, byte_source); + assert(!st); + + assert(serd_reader_read_document(reader) == SERD_ERR_UNKNOWN); + + serd_byte_source_free(byte_source); + serd_reader_free(reader); + serd_sink_free(sink); + serd_world_free(world); +} + static void test_read_string(void) { @@ -44,18 +93,33 @@ test_read_string(void) SerdReader* reader = serd_reader_new(world, SERD_TURTLE, 0u, sink, 4096); assert(reader); - // Test reading a string that ends exactly at the end of input (no newline) - assert( - !serd_reader_start_string(reader, - " " - " .", - NULL)); + SerdByteSource* byte_source = + serd_byte_source_new_string(" " + " .", + NULL); + // Test reading a string that ends exactly at the end of input (no newline) + assert(!serd_reader_start(reader, byte_source)); assert(!serd_reader_read_document(reader)); assert(n_statements == 1); assert(!serd_reader_finish(reader)); + // Test reading the same but as a chunk + serd_byte_source_free(byte_source); + n_statements = 0; + byte_source = + serd_byte_source_new_string(" " + " , _:blank .", + NULL); + + assert(!serd_reader_start(reader, byte_source)); + assert(!serd_reader_read_chunk(reader)); + assert(n_statements == 2); + assert(serd_reader_read_chunk(reader) == SERD_FAILURE); + assert(!serd_reader_finish(reader)); + serd_reader_free(reader); + serd_byte_source_free(byte_source); serd_sink_free(sink); serd_world_free(world); } @@ -120,13 +184,16 @@ test_read_eof_by_page(void) SerdSink* sink = serd_sink_new(&ignored, count_statements, NULL); SerdReader* reader = serd_reader_new(world, SERD_TURTLE, 0u, sink, 4096); - serd_reader_start_stream( - reader, (SerdReadFunc)fread, (SerdStreamErrorFunc)ferror, temp, NULL, 4096); + SerdByteSource* byte_source = serd_byte_source_new_function( + (SerdReadFunc)fread, (SerdStreamErrorFunc)ferror, NULL, temp, NULL, 4096); + assert(serd_reader_start(reader, byte_source) == SERD_SUCCESS); assert(serd_reader_read_chunk(reader) == SERD_SUCCESS); assert(serd_reader_read_chunk(reader) == SERD_FAILURE); assert(serd_reader_read_chunk(reader) == SERD_FAILURE); + assert(!serd_reader_finish(reader)); + serd_byte_source_free(byte_source); serd_reader_free(reader); serd_sink_free(sink); serd_world_free(world); @@ -142,19 +209,23 @@ test_read_eof_by_byte(void) SerdSink* sink = serd_sink_new(&ignored, count_statements, NULL); SerdReader* reader = serd_reader_new(world, SERD_TURTLE, 0u, sink, 4096); - size_t n_reads = 0u; - serd_reader_start_stream(reader, - (SerdReadFunc)eof_test_read, - (SerdStreamErrorFunc)eof_test_error, - &n_reads, - NULL, - 1); + size_t n_reads = 0u; + SerdByteSource* byte_source = + serd_byte_source_new_function((SerdReadFunc)eof_test_read, + (SerdStreamErrorFunc)eof_test_error, + NULL, + &n_reads, + NULL, + 1); + assert(serd_reader_start(reader, byte_source) == SERD_SUCCESS); assert(serd_reader_read_chunk(reader) == SERD_SUCCESS); assert(serd_reader_read_chunk(reader) == SERD_FAILURE); assert(serd_reader_read_chunk(reader) == SERD_SUCCESS); assert(serd_reader_read_chunk(reader) == SERD_FAILURE); + assert(!serd_reader_finish(reader)); + serd_byte_source_free(byte_source); serd_reader_free(reader); serd_sink_free(sink); serd_world_free(world); @@ -175,8 +246,10 @@ test_read_chunks(void) serd_reader_new(world, SERD_TURTLE, 0u, sink, 4096); assert(reader); - SerdStatus st = serd_reader_start_stream( - reader, (SerdReadFunc)fread, (SerdStreamErrorFunc)ferror, f, NULL, 1); + SerdByteSource* byte_source = serd_byte_source_new_function( + (SerdReadFunc)fread, (SerdStreamErrorFunc)ferror, NULL, f, NULL, 1); + + SerdStatus st = serd_reader_start(reader, byte_source); assert(st == SERD_SUCCESS); // Write two statement separated by null characters @@ -217,6 +290,7 @@ test_read_chunks(void) assert(st == SERD_FAILURE); assert(n_statements == 2); + serd_byte_source_free(byte_source); serd_reader_free(reader); serd_sink_free(sink); fclose(f); @@ -226,6 +300,7 @@ test_read_chunks(void) int main(void) { + test_prepare_error(); test_read_string(); test_read_eof_by_page(); test_read_eof_by_byte(); diff --git a/test/test_reader_writer.c b/test/test_reader_writer.c index 8671a649..3a9bf7c9 100644 --- a/test/test_reader_writer.c +++ b/test/test_reader_writer.c @@ -19,7 +19,6 @@ #include "serd/serd.h" #include -#include #include #include #include @@ -149,8 +148,8 @@ test_reader(const char* path) SerdReader* reader = serd_reader_new(world, SERD_TURTLE, 0u, sink, 4096); assert(reader); - assert(serd_reader_read_document(reader) == SERD_FAILURE); - assert(serd_reader_read_chunk(reader) == SERD_FAILURE); + assert(serd_reader_read_document(reader) == SERD_ERR_BAD_CALL); + assert(serd_reader_read_chunk(reader) == SERD_ERR_BAD_CALL); serd_reader_add_blank_prefix(reader, "tmp"); @@ -163,14 +162,12 @@ test_reader(const char* path) # pragma GCC diagnostic pop #endif - assert(serd_reader_start_file(reader, "http://notafile", false)); - assert(serd_reader_start_file(reader, "file://invalid", false)); - assert(serd_reader_start_file(reader, "file:///nonexistant", false)); - - assert(!serd_reader_start_file(reader, path, true)); + SerdByteSource* byte_source = serd_byte_source_new_filename(path, 4096); + assert(!serd_reader_start(reader, byte_source)); assert(!serd_reader_read_document(reader)); assert(n_statements == 6); serd_reader_finish(reader); + serd_byte_source_free(byte_source); serd_reader_free(reader); serd_sink_free(sink); diff --git a/test/test_string.c b/test/test_string.c index 5bc304d1..495138d8 100644 --- a/test/test_string.c +++ b/test/test_string.c @@ -44,7 +44,7 @@ test_strerror(void) { const char* msg = serd_strerror(SERD_SUCCESS); assert(!strcmp(msg, "Success")); - for (int i = SERD_FAILURE; i <= SERD_ERR_NO_DATA; ++i) { + for (int i = SERD_FAILURE; i <= SERD_ERR_BAD_CALL; ++i) { msg = serd_strerror((SerdStatus)i); assert(strcmp(msg, "Success")); } -- cgit v1.2.1