aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2020-06-28 23:26:48 +0200
committerDavid Robillard <d@drobilla.net>2022-01-13 23:05:24 -0500
commit55e28966226268a57edb07419ac419ef53ac437d (patch)
tree317a3e50480f97a415dae5a7d096726a0c730c0a
parentb98bd7a32cf4302e0a210dd8558edd3ab2088525 (diff)
downloadserd-55e28966226268a57edb07419ac419ef53ac437d.tar.gz
serd-55e28966226268a57edb07419ac419ef53ac437d.tar.bz2
serd-55e28966226268a57edb07419ac419ef53ac437d.zip
Make Reader always read from a ByteSource
-rw-r--r--.includes.imp2
-rw-r--r--include/serd/serd.h121
-rw-r--r--src/byte_source.c165
-rw-r--r--src/byte_source.h61
-rw-r--r--src/n3.c4
-rw-r--r--src/reader.c87
-rw-r--r--src/reader.h6
-rw-r--r--src/serdi.c36
-rw-r--r--src/string.c2
-rw-r--r--src/world.c27
-rw-r--r--src/world.h5
-rw-r--r--test/meson.build16
-rw-r--r--test/test_byte_source.c40
-rw-r--r--test/test_free_null.c1
-rw-r--r--test/test_overflow.c6
-rw-r--r--test/test_read_chunk.c25
-rw-r--r--test/test_reader.c109
-rw-r--r--test/test_reader_writer.c13
-rw-r--r--test/test_string.c2
19 files changed, 460 insertions, 268 deletions
diff --git a/.includes.imp b/.includes.imp
index 9ff42384..3829d78c 100644
--- a/.includes.imp
+++ b/.includes.imp
@@ -7,4 +7,6 @@
{ "symbol": [ "uint32_t", "private", "<stdint.h>", "public" ] },
{ "symbol": [ "uint64_t", "private", "<stdint.h>", "public" ] },
{ "symbol": [ "uint8_t", "private", "<stdint.h>", "public" ] },
+
+ { "include": [ "<bits/struct_stat.h>", "private", "<sys/stat.h>", "public", ] },
]
diff --git a/include/serd/serd.h b/include/serd/serd.h
index 362de28d..82292389 100644
--- a/include/serd/serd.h
+++ b/include/serd/serd.h
@@ -206,6 +206,7 @@ typedef enum {
SERD_ERR_BAD_TEXT, ///< Invalid text encoding
SERD_ERR_BAD_WRITE, ///< Error writing to file/stream
SERD_ERR_NO_DATA, ///< Unexpected end of input
+ SERD_ERR_BAD_CALL, ///< Invalid call
} SerdStatus;
/**
@@ -275,15 +276,6 @@ serd_canonical_path(const char* SERD_NONNULL path);
*/
/**
- Function to detect I/O stream errors.
-
- Identical semantics to `ferror`.
-
- @return Non-zero if `stream` has encountered an error.
-*/
-typedef int (*SerdStreamErrorFunc)(void* SERD_NONNULL stream);
-
-/**
Source function for raw string input.
Identical semantics to `fread`, but may set errno for more informative error
@@ -318,6 +310,24 @@ typedef size_t (*SerdWriteFunc)(const void* SERD_NONNULL buf,
void* SERD_NONNULL stream);
/**
+ Function to detect I/O stream errors.
+
+ Identical semantics to `ferror`.
+
+ @return Non-zero if `stream` has encountered an error.
+*/
+typedef int (*SerdStreamErrorFunc)(void* SERD_NONNULL stream);
+
+/**
+ Function to close an I/O stream.
+
+ Identical semantics to `fclose`.
+
+ @return Non-zero if `stream` has encountered an error.
+*/
+typedef int (*SerdStreamCloseFunc)(void* SERD_NONNULL stream);
+
+/**
@}
@defgroup serd_syntax Syntax Utilities
@{
@@ -1473,6 +1483,70 @@ serd_env_write_prefixes(const SerdEnv* SERD_NONNULL env,
/**
@}
+ @defgroup serd_byte_source Byte Source
+ @{
+*/
+
+/// A source for bytes that provides text input
+typedef struct SerdByteSourceImpl SerdByteSource;
+
+/**
+ Create a new byte source that reads from a string.
+
+ @param string Null-terminated UTF-8 string to read from.
+ @param name Optional name of stream for error messages (string or URI).
+*/
+SERD_API
+SerdByteSource* SERD_ALLOCATED
+serd_byte_source_new_string(const char* SERD_NONNULL string,
+ const SerdNode* SERD_NULLABLE name);
+
+/**
+ Create a new byte source that reads from a file.
+
+ An arbitrary `FILE*` can be used via serd_byte_source_new_function() as
+ well, this is just a convenience function that opens the file properly, sets
+ flags for optimized I/O if possible, and automatically sets the name of the
+ source to the file path.
+
+ @param path Path of file to open and read from.
+ @param page_size Number of bytes to read per call.
+*/
+SERD_API
+SerdByteSource* SERD_ALLOCATED
+serd_byte_source_new_filename(const char* SERD_NONNULL path, size_t page_size);
+
+/**
+ Create a new byte source that reads from a user-specified function
+
+ The `stream` will be passed to the `read_func`, which is compatible with the
+ standard C `fread` if `stream` is a `FILE*`. Note that the reader only ever
+ reads individual bytes at a time, that is, the `size` parameter will always
+ be 1 (but `nmemb` may be higher).
+
+ @param read_func Stream read function, like `fread`.
+ @param error_func Stream error function, like `ferror`.
+ @param close_func Stream close function, like `fclose`.
+ @param stream Context parameter passed to `read_func` and `error_func`.
+ @param name Optional name of stream for error messages (string or URI).
+ @param page_size Number of bytes to read per call.
+*/
+SERD_API
+SerdByteSource* SERD_ALLOCATED
+serd_byte_source_new_function(SerdReadFunc SERD_NONNULL read_func,
+ SerdStreamErrorFunc SERD_NONNULL error_func,
+ SerdStreamCloseFunc SERD_NULLABLE close_func,
+ void* SERD_NULLABLE stream,
+ const SerdNode* SERD_NULLABLE name,
+ size_t page_size);
+
+/// Free `source`
+SERD_API
+void
+serd_byte_source_free(SerdByteSource* SERD_NULLABLE source);
+
+/**
+ @}
@defgroup serd_reader Reader
@{
*/
@@ -1511,34 +1585,11 @@ void
serd_reader_add_blank_prefix(SerdReader* SERD_NONNULL reader,
const char* SERD_NULLABLE prefix);
-/// Prepare to read from the file at a local file `uri`
-SERD_API
-SerdStatus
-serd_reader_start_file(SerdReader* SERD_NONNULL reader,
- const char* SERD_NONNULL uri,
- bool bulk);
-
-/**
- Prepare to read from a stream.
-
- The `read_func` is guaranteed to only be called for `page_size` elements
- with size 1 (i.e. `page_size` bytes).
-*/
-SERD_API
-SerdStatus
-serd_reader_start_stream(SerdReader* SERD_NONNULL reader,
- SerdReadFunc SERD_NONNULL read_func,
- SerdStreamErrorFunc SERD_NONNULL error_func,
- void* SERD_NONNULL stream,
- const SerdNode* SERD_NULLABLE name,
- size_t page_size);
-
-/// Prepare to read from a string
+/// Prepare to read from a byte source
SERD_API
SerdStatus
-serd_reader_start_string(SerdReader* SERD_NONNULL reader,
- const char* SERD_NONNULL utf8,
- const SerdNode* SERD_NULLABLE name);
+serd_reader_start(SerdReader* SERD_NONNULL reader,
+ SerdByteSource* SERD_NONNULL byte_source);
/**
Read a single "chunk" of data during an incremental read.
diff --git a/src/byte_source.c b/src/byte_source.c
index 2e4f66cb..ef6bf3bb 100644
--- a/src/byte_source.c
+++ b/src/byte_source.c
@@ -17,12 +17,21 @@
#include "byte_source.h"
#include "caret.h"
+#include "serd_config.h"
#include "system.h"
#include "serd/serd.h"
+#include <sys/stat.h>
+
+#if USE_POSIX_FADVISE && USE_FILENO
+# include <fcntl.h>
+#endif
+
#include <stdbool.h>
#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
SerdStatus
@@ -50,28 +59,34 @@ serd_byte_source_page(SerdByteSource* const source)
return SERD_SUCCESS;
}
-SerdStatus
-serd_byte_source_open_source(SerdByteSource* const source,
- const SerdReadFunc read_func,
- const SerdStreamErrorFunc error_func,
- const SerdStreamCloseFunc close_func,
- void* const stream,
- const SerdNode* const name,
- const size_t page_size)
+SerdByteSource*
+serd_byte_source_new_function(const SerdReadFunc read_func,
+ const SerdStreamErrorFunc error_func,
+ const SerdStreamCloseFunc close_func,
+ void* const stream,
+ const SerdNode* const name,
+ const size_t page_size)
{
- assert(page_size > 0);
- memset(source, '\0', sizeof(*source));
- source->read_func = read_func;
- source->error_func = error_func;
- source->close_func = close_func;
- source->stream = stream;
- source->page_size = page_size;
- source->buf_size = page_size;
- source->name = serd_node_copy(name);
- source->caret.file = source->name;
- source->caret.line = 1u;
- source->caret.col = 1u;
- source->from_stream = true;
+ if (!page_size) {
+ return NULL;
+ }
+
+ SerdByteSource* source = (SerdByteSource*)calloc(1, sizeof(SerdByteSource));
+
+ source->read_func = read_func;
+ source->error_func = error_func;
+ source->close_func = close_func;
+ source->stream = stream;
+ source->page_size = page_size;
+ source->buf_size = page_size;
+ source->type = FROM_FUNCTION;
+
+ source->name =
+ name ? serd_node_copy(name) : serd_new_string(SERD_STRING("func"));
+
+ source->caret.file = source->name;
+ source->caret.line = 1u;
+ source->caret.col = 1u;
if (page_size > 1) {
source->file_buf = (uint8_t*)serd_allocate_buffer(page_size);
@@ -81,58 +96,112 @@ serd_byte_source_open_source(SerdByteSource* const source,
source->read_buf = &source->read_byte;
}
- return SERD_SUCCESS;
+ return source;
}
-SerdStatus
-serd_byte_source_prepare(SerdByteSource* const source)
+static bool
+is_directory(const char* const path)
{
- if (source->page_size == 0) {
- return SERD_FAILURE;
+#ifdef _MSC_VER
+ struct stat st;
+ return !stat(path, &st) && (st.st_mode & _S_IFDIR);
+#else
+ struct stat st;
+ return !stat(path, &st) && S_ISDIR(st.st_mode);
+#endif
+}
+
+SerdByteSource*
+serd_byte_source_new_filename(const char* const path, const size_t page_size)
+{
+ if (page_size == 0 || is_directory(path)) {
+ return NULL;
}
- source->prepared = true;
+ FILE* const fd = fopen(path, "rb");
+ if (!fd) {
+ return NULL;
+ }
- if (source->from_stream) {
- return (source->page_size > 1 ? serd_byte_source_page(source)
- : serd_byte_source_advance(source));
+ SerdByteSource* source = (SerdByteSource*)calloc(1, sizeof(SerdByteSource));
+
+ source->read_func = (SerdReadFunc)fread;
+ source->error_func = (SerdStreamErrorFunc)ferror;
+ source->close_func = (SerdStreamCloseFunc)fclose;
+ source->stream = fd;
+ source->page_size = page_size;
+ source->buf_size = page_size;
+
+ source->name = serd_new_file_uri(SERD_STRING(path), SERD_EMPTY_STRING());
+ source->type = FROM_FILENAME;
+
+ source->caret.file = source->name;
+ source->caret.line = 1u;
+ source->caret.col = 1u;
+
+ if (page_size > 1) {
+ source->file_buf = (uint8_t*)serd_allocate_buffer(page_size);
+ source->read_buf = source->file_buf;
+ memset(source->file_buf, '\0', page_size);
+ } else {
+ source->read_buf = &source->read_byte;
}
- return SERD_SUCCESS;
+#if USE_POSIX_FADVISE && USE_FILENO
+ posix_fadvise(fileno(fd), 0, 0, POSIX_FADV_SEQUENTIAL);
+#endif
+
+ return source;
}
-SerdStatus
-serd_byte_source_open_string(SerdByteSource* const source,
- const char* const utf8,
- const SerdNode* const name)
+SerdByteSource*
+serd_byte_source_new_string(const char* const string,
+ const SerdNode* const name)
{
- memset(source, '\0', sizeof(*source));
+ SerdByteSource* source = (SerdByteSource*)calloc(1, sizeof(SerdByteSource));
source->page_size = 1;
+ source->read_buf = (const uint8_t*)string;
+ source->type = FROM_STRING;
+
source->name =
name ? serd_node_copy(name) : serd_new_string(SERD_STRING("string"));
- source->read_buf = (const uint8_t*)utf8;
source->caret.file = source->name;
source->caret.line = 1u;
source->caret.col = 1u;
- return SERD_SUCCESS;
+ return source;
}
SerdStatus
-serd_byte_source_close(SerdByteSource* const source)
+serd_byte_source_prepare(SerdByteSource* const source)
{
- SerdStatus st = SERD_SUCCESS;
- if (source->close_func) {
- st = source->close_func(source->stream) ? SERD_ERR_UNKNOWN : SERD_SUCCESS;
- }
+ source->prepared = true;
+ if (source->type != FROM_STRING) {
+ if (source->page_size > 1) {
+ return serd_byte_source_page(source);
+ }
- if (source->page_size > 1) {
- serd_free_aligned(source->file_buf);
+ return serd_byte_source_advance(source);
}
- serd_node_free(source->name);
- memset(source, '\0', sizeof(*source));
- return st;
+ return SERD_SUCCESS;
+}
+
+void
+serd_byte_source_free(SerdByteSource* const source)
+{
+ if (source) {
+ if (source->close_func) {
+ source->close_func(source->stream);
+ }
+
+ if (source->page_size > 1) {
+ serd_free_aligned(source->file_buf);
+ }
+
+ serd_node_free(source->name);
+ free(source);
+ }
}
diff --git a/src/byte_source.h b/src/byte_source.h
index 1c9fbd1f..d054e156 100644
--- a/src/byte_source.h
+++ b/src/byte_source.h
@@ -26,42 +26,29 @@
#include <stddef.h>
#include <stdint.h>
-typedef int (*SerdStreamCloseFunc)(void*);
-
-typedef struct {
- SerdReadFunc read_func; ///< Read function (e.g. fread)
- SerdStreamErrorFunc error_func; ///< Error function (e.g. ferror)
- SerdStreamCloseFunc close_func; ///< Function for closing stream
- void* stream; ///< Stream (e.g. FILE)
- size_t page_size; ///< Number of bytes to read at a time
- size_t buf_size; ///< Number of bytes in file_buf
- SerdNode* name; ///< Name of stream (referenced by cur)
- SerdCaret caret; ///< Caret for error reporting
- uint8_t* file_buf; ///< Buffer iff reading pages from a file
- const uint8_t* read_buf; ///< Pointer to file_buf or read_byte
- size_t read_head; ///< Offset into read_buf
- uint8_t read_byte; ///< 1-byte 'buffer' used when not paging
- bool from_stream; ///< True iff reading from `stream`
- bool prepared; ///< True iff prepared for reading
- bool eof; ///< True iff end of file reached
-} SerdByteSource;
-
-SerdStatus
-serd_byte_source_open_string(SerdByteSource* source,
- const char* utf8,
- const SerdNode* name);
-
-SerdStatus
-serd_byte_source_open_source(SerdByteSource* source,
- SerdReadFunc read_func,
- SerdStreamErrorFunc error_func,
- SerdStreamCloseFunc close_func,
- void* stream,
- const SerdNode* name,
- size_t page_size);
-
-SerdStatus
-serd_byte_source_close(SerdByteSource* source);
+typedef enum {
+ FROM_STRING, ///< Reading from a user-provided buffer
+ FROM_FILENAME, ///< Reading from a file we opened
+ FROM_FUNCTION, ///< Reading from a user-provided function
+} SerdByteSourceType;
+
+struct SerdByteSourceImpl {
+ SerdReadFunc read_func; ///< Read function (e.g. fread)
+ SerdStreamErrorFunc error_func; ///< Error function (e.g. ferror)
+ SerdStreamCloseFunc close_func; ///< Function for closing stream
+ void* stream; ///< Stream (e.g. FILE)
+ size_t page_size; ///< Number of bytes to read at a time
+ size_t buf_size; ///< Number of bytes in file_buf
+ SerdNode* name; ///< Name of stream (referenced by cur)
+ SerdCaret caret; ///< File position for error reporting
+ uint8_t* file_buf; ///< Buffer iff reading pages from a file
+ const uint8_t* read_buf; ///< Pointer to file_buf or read_byte
+ size_t read_head; ///< Offset into read_buf
+ SerdByteSourceType type; ///< Type of input
+ uint8_t read_byte; ///< 1-byte 'buffer' used when not paging
+ bool prepared; ///< True iff prepared for reading
+ bool eof; ///< True iff end of file reached
+};
SerdStatus
serd_byte_source_prepare(SerdByteSource* source);
@@ -92,7 +79,7 @@ serd_byte_source_advance(SerdByteSource* source)
++source->caret.col;
}
- if (source->from_stream) {
+ if (source->type != FROM_STRING) {
if (++source->read_head >= source->buf_size) {
st = serd_byte_source_page(source);
}
diff --git a/src/n3.c b/src/n3.c
index 46fe281b..a6aa71ba 100644
--- a/src/n3.c
+++ b/src/n3.c
@@ -1678,7 +1678,7 @@ skip_until(SerdReader* const reader, const uint8_t byte)
SerdStatus
read_turtleTrigDoc(SerdReader* const reader)
{
- while (!reader->source.eof) {
+ while (!reader->source->eof) {
const size_t orig_stack_size = reader->stack.size;
const SerdStatus st = read_n3_statement(reader);
if (st > SERD_FAILURE) {
@@ -1699,7 +1699,7 @@ SerdStatus
read_nquadsDoc(SerdReader* const reader)
{
SerdStatus st = SERD_SUCCESS;
- while (!st && !reader->source.eof) {
+ while (!st && !reader->source->eof) {
const size_t orig_stack_size = reader->stack.size;
SerdStatementFlags flags = 0;
diff --git a/src/reader.c b/src/reader.c
index ed6caafd..fe88ee14 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -37,7 +37,7 @@ r_err(SerdReader* const reader, const SerdStatus st, const char* const fmt, ...)
{
va_list args;
va_start(args, fmt);
- const SerdError e = {st, &reader->source.caret, fmt, &args};
+ const SerdError e = {st, &reader->source->caret, fmt, &args};
serd_world_error(reader->world, &e);
va_end(args);
return st;
@@ -69,7 +69,8 @@ tolerate_status(const SerdReader* const reader, const SerdStatus status)
}
if (status == SERD_ERR_INTERNAL || status == SERD_ERR_OVERFLOW ||
- status == SERD_ERR_BAD_WRITE || status == SERD_ERR_NO_DATA) {
+ status == SERD_ERR_BAD_WRITE || status == SERD_ERR_NO_DATA ||
+ status == SERD_ERR_BAD_CALL) {
return false;
}
@@ -145,7 +146,7 @@ emit_statement(SerdReader* const reader,
serd_node_zero_pad(o);
const SerdStatement statement = {{ctx.subject, ctx.predicate, o, ctx.graph},
- &reader->source.caret};
+ &reader->source->caret};
const SerdStatus st =
serd_sink_write_statement(reader->sink, *ctx.flags, &statement);
@@ -163,7 +164,11 @@ read_statement(SerdReader* const reader)
SerdStatus
serd_reader_read_document(SerdReader* const reader)
{
- if (!reader->source.prepared) {
+ if (!reader->source) {
+ return SERD_ERR_BAD_CALL;
+ }
+
+ if (!reader->source->prepared) {
SerdStatus st = serd_reader_prepare(reader);
if (st) {
return st;
@@ -241,12 +246,12 @@ serd_reader_add_blank_prefix(SerdReader* const reader, const char* const prefix)
static SerdStatus
skip_bom(SerdReader* const me)
{
- if (serd_byte_source_peek(&me->source) == 0xEF) {
- serd_byte_source_advance(&me->source);
- if (serd_byte_source_peek(&me->source) != 0xBB ||
- serd_byte_source_advance(&me->source) ||
- serd_byte_source_peek(&me->source) != 0xBF ||
- serd_byte_source_advance(&me->source)) {
+ if (serd_byte_source_peek(me->source) == 0xEF) {
+ serd_byte_source_advance(me->source);
+ if (serd_byte_source_peek(me->source) != 0xBB ||
+ serd_byte_source_advance(me->source) ||
+ serd_byte_source_peek(me->source) != 0xBF ||
+ serd_byte_source_advance(me->source)) {
r_err(me, SERD_ERR_BAD_SYNTAX, "corrupt byte order mark\n");
return SERD_ERR_BAD_SYNTAX;
}
@@ -256,60 +261,23 @@ skip_bom(SerdReader* const me)
}
SerdStatus
-serd_reader_start_stream(SerdReader* const reader,
- const SerdReadFunc read_func,
- const SerdStreamErrorFunc error_func,
- void* const stream,
- const SerdNode* const name,
- const size_t page_size)
+serd_reader_start(SerdReader* const reader, SerdByteSource* const byte_source)
{
- return serd_byte_source_open_source(
- &reader->source, read_func, error_func, NULL, stream, name, page_size);
-}
-
-SerdStatus
-serd_reader_start_file(SerdReader* reader, const char* uri, bool bulk)
-{
- char* const path = serd_parse_file_uri(uri, NULL);
- if (!path) {
- return SERD_ERR_BAD_ARG;
- }
-
- FILE* fd = serd_world_fopen(reader->world, path, "rb");
- free(path);
- if (!fd) {
- return SERD_ERR_UNKNOWN;
- }
+ serd_reader_finish(reader);
- SerdNode* const name = serd_new_uri(SERD_STRING(uri));
- const SerdStatus st = serd_byte_source_open_source(
- &reader->source,
- bulk ? (SerdReadFunc)fread : serd_file_read_byte,
- (SerdStreamErrorFunc)ferror,
- (SerdStreamCloseFunc)fclose,
- fd,
- name,
- bulk ? SERD_PAGE_SIZE : 1u);
- serd_node_free(name);
- return st;
-}
+ reader->source = byte_source;
-SerdStatus
-serd_reader_start_string(SerdReader* const reader,
- const char* const utf8,
- const SerdNode* const name)
-{
- return serd_byte_source_open_string(&reader->source, utf8, name);
+ return reader->source ? SERD_SUCCESS : SERD_ERR_BAD_ARG;
}
static SerdStatus
serd_reader_prepare(SerdReader* const reader)
{
- SerdStatus st = serd_byte_source_prepare(&reader->source);
+ SerdStatus st = serd_byte_source_prepare(reader->source);
if (st == SERD_SUCCESS) {
st = skip_bom(reader);
} else if (st == SERD_FAILURE) {
- reader->source.eof = true;
+ reader->source->eof = true;
}
return st;
}
@@ -318,10 +286,14 @@ SerdStatus
serd_reader_read_chunk(SerdReader* const reader)
{
SerdStatus st = SERD_SUCCESS;
- if (!reader->source.prepared) {
+ if (!reader->source) {
+ return SERD_ERR_BAD_CALL;
+ }
+
+ if (!reader->source->prepared) {
st = serd_reader_prepare(reader);
- } else if (reader->source.eof) {
- st = serd_byte_source_advance(&reader->source);
+ } else if (reader->source->eof) {
+ st = serd_byte_source_advance(reader->source);
}
return st ? st : read_statement(reader);
@@ -330,5 +302,6 @@ serd_reader_read_chunk(SerdReader* const reader)
SerdStatus
serd_reader_finish(SerdReader* const reader)
{
- return serd_byte_source_close(&reader->source);
+ reader->source = NULL;
+ return SERD_SUCCESS;
}
diff --git a/src/reader.h b/src/reader.h
index a8c154dd..76f46506 100644
--- a/src/reader.h
+++ b/src/reader.h
@@ -50,7 +50,7 @@ struct SerdReaderImpl {
SerdNode* rdf_first;
SerdNode* rdf_rest;
SerdNode* rdf_nil;
- SerdByteSource source;
+ SerdByteSource* source;
SerdStack stack;
SerdSyntax syntax;
unsigned next_id;
@@ -107,7 +107,7 @@ read_turtleTrigDoc(SerdReader* reader);
static inline int
peek_byte(SerdReader* reader)
{
- SerdByteSource* source = &reader->source;
+ SerdByteSource* source = reader->source;
return source->eof ? EOF : (int)source->read_buf[source->read_head];
}
@@ -120,7 +120,7 @@ eat_byte_safe(SerdReader* reader, const int byte)
const int c = peek_byte(reader);
assert(c == byte);
- serd_byte_source_advance(&reader->source);
+ serd_byte_source_advance(reader->source);
return c;
}
diff --git a/src/serdi.c b/src/serdi.c
index 2e04ae5a..3025b494 100644
--- a/src/serdi.c
+++ b/src/serdi.c
@@ -27,6 +27,7 @@
# include <io.h>
#endif
+#include <errno.h>
#include <limits.h>
#include <stdbool.h>
#include <stdio.h>
@@ -104,25 +105,36 @@ read_file(SerdWorld* const world,
syntax = syntax ? syntax : serd_guess_syntax(filename);
syntax = syntax ? syntax : SERD_TRIG;
- SerdStatus st = SERD_SUCCESS;
- SerdReader* reader = serd_reader_new(world, syntax, flags, sink, stack_size);
-
- serd_reader_add_blank_prefix(reader, add_prefix);
-
+ SerdByteSource* byte_source = NULL;
if (!strcmp(filename, "-")) {
SerdNode* name = serd_new_string(SERD_STRING("stdin"));
- st = serd_reader_start_stream(
- reader, serd_file_read_byte, (SerdStreamErrorFunc)ferror, stdin, name, 1);
+ byte_source = serd_byte_source_new_function(
+ serd_file_read_byte, (SerdStreamErrorFunc)ferror, NULL, stdin, name, 1);
serd_node_free(name);
} else {
- st = serd_reader_start_file(reader, filename, bulk_read);
+ byte_source =
+ serd_byte_source_new_filename(filename, bulk_read ? SERD_PAGE_SIZE : 1u);
+ }
+
+ if (!byte_source) {
+ SERDI_ERRORF(
+ "failed to open input file `%s' (%s)\n", filename, strerror(errno));
+
+ return SERD_ERR_UNKNOWN;
}
+ SerdReader* reader = serd_reader_new(world, syntax, flags, sink, stack_size);
+
+ serd_reader_add_blank_prefix(reader, add_prefix);
+
+ SerdStatus st = serd_reader_start(reader, byte_source);
+
st = st ? st : serd_reader_read_document(reader);
serd_reader_free(reader);
+ serd_byte_source_free(byte_source);
return st;
}
@@ -332,6 +344,9 @@ main(int argc, char** argv)
SerdStatus st = SERD_SUCCESS;
SerdNode* input_name = NULL;
if (input_string) {
+ SerdByteSource* const byte_source =
+ serd_byte_source_new_string(input_string, NULL);
+
SerdReader* const reader =
serd_reader_new(world,
input_syntax ? input_syntax : SERD_TRIG,
@@ -341,13 +356,12 @@ main(int argc, char** argv)
serd_reader_add_blank_prefix(reader, add_prefix);
- SerdNode* name = serd_new_string(SERD_STRING("string"));
- if (!(st = serd_reader_start_string(reader, input_string, name))) {
+ if (!(st = serd_reader_start(reader, byte_source))) {
st = serd_reader_read_document(reader);
}
- serd_node_free(name);
serd_reader_free(reader);
+ serd_byte_source_free(byte_source);
}
size_t prefix_len = 0;
diff --git a/src/string.c b/src/string.c
index 097e73ef..13fb9263 100644
--- a/src/string.c
+++ b/src/string.c
@@ -58,6 +58,8 @@ serd_strerror(const SerdStatus status)
return "Error writing to file";
case SERD_ERR_NO_DATA:
return "Unexpected end of input";
+ case SERD_ERR_BAD_CALL:
+ return "Invalid call";
}
return "Unknown error";
diff --git a/src/world.c b/src/world.c
index e0ce9201..bd70d615 100644
--- a/src/world.c
+++ b/src/world.c
@@ -19,14 +19,7 @@
#include "caret.h"
#include "namespaces.h"
#include "node.h"
-#include "serd_config.h"
-#include "system.h"
-#if defined(USE_POSIX_FADVISE)
-# include <fcntl.h>
-#endif
-
-#include <errno.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
@@ -34,26 +27,6 @@
#define BLANK_CHARS 12
-FILE*
-serd_world_fopen(SerdWorld* world, const char* path, const char* mode)
-{
- FILE* fd = fopen(path, mode);
- if (!fd) {
- char message[1024] = {0};
- serd_system_strerror(errno, message, sizeof(message));
-
- serd_world_errorf(
- world, SERD_ERR_INTERNAL, "failed to open file %s (%s)\n", path, message);
- return NULL;
- }
-
-#if USE_POSIX_FADVISE && USE_FILENO
- posix_fadvise(fileno(fd), 0, 0, POSIX_FADV_SEQUENTIAL);
-#endif
-
- return fd;
-}
-
SerdStatus
serd_world_error(const SerdWorld* const world, const SerdError* const e)
{
diff --git a/src/world.h b/src/world.h
index 44e21166..a70a6e28 100644
--- a/src/world.h
+++ b/src/world.h
@@ -20,7 +20,6 @@
#include "serd/serd.h"
#include <stdint.h>
-#include <stdio.h>
struct SerdWorldImpl {
SerdNodes* nodes;
@@ -37,10 +36,6 @@ struct SerdWorldImpl {
uint32_t next_blank_id;
};
-/// Open a file configured for fast sequential reading
-FILE*
-serd_world_fopen(SerdWorld* world, const char* path, const char* mode);
-
SerdStatus
serd_world_error(const SerdWorld* world, const SerdError* e);
diff --git a/test/meson.build b/test/meson.build
index b6c2ce2f..5063277b 100644
--- a/test/meson.build
+++ b/test/meson.build
@@ -6,6 +6,7 @@ wrapper = meson.get_cross_property('exe_wrapper', '')
unit_tests = [
'byte_sink',
+ 'byte_source',
'caret',
'env',
'free_null',
@@ -160,25 +161,32 @@ if get_option('utils')
# IO errors
test('read_dir', serdi,
- args: ['-e', 'file://@0@/'.format(meson.source_root())],
+ args: ['-e', meson.source_root()],
env: test_env,
should_fail: true,
suite: 'io_errors')
test('bulk_read_dir', serdi,
- args: ['file://@0@/'.format(meson.source_root())],
+ args: [meson.source_root()],
env: test_env,
should_fail: true,
suite: 'io_errors')
+ if host_machine.system() == 'linux'
+ test('unreadable', serdi,
+ args: ['/sys/bus/pci/rescan'],
+ env: test_env,
+ should_fail: true,
+ suite: 'io_errors')
+ endif
+
test('write_error', files('test_write_error.py'),
args: script_args + [serd_ttl],
env: test_env,
suite: 'io_errors')
test('write_bad_file', serdi,
- args: ['-w', '/does/not/exist.ttl',
- 'file://@0@/serd.ttl'.format(meson.source_root())],
+ args: ['-w', '/does/not/exist.ttl', meson.source_root() / 'serd.ttl'],
env: test_env,
should_fail: true,
suite: 'io_errors')
diff --git a/test/test_byte_source.c b/test/test_byte_source.c
new file mode 100644
index 00000000..14ef5819
--- /dev/null
+++ b/test/test_byte_source.c
@@ -0,0 +1,40 @@
+/*
+ Copyright 2021 David Robillard <d@drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#undef NDEBUG
+
+#include "serd/serd.h"
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdio.h>
+
+static void
+test_bad_page_size(void)
+{
+ assert(!serd_byte_source_new_filename("file.ttl", 0));
+
+ assert(!serd_byte_source_new_function(
+ (SerdReadFunc)fread, (SerdStreamErrorFunc)ferror, NULL, NULL, NULL, 0));
+}
+
+int
+main(void)
+{
+ test_bad_page_size();
+
+ return 0;
+}
diff --git a/test/test_free_null.c b/test/test_free_null.c
index 15e751a9..28f7563f 100644
--- a/test/test_free_null.c
+++ b/test/test_free_null.c
@@ -24,6 +24,7 @@ int
main(void)
{
serd_free(NULL);
+ serd_byte_source_free(NULL);
serd_byte_sink_free(NULL);
serd_node_free(NULL);
serd_world_free(NULL);
diff --git a/test/test_overflow.c b/test/test_overflow.c
index 936187a9..6d5c6d0c 100644
--- a/test/test_overflow.c
+++ b/test/test_overflow.c
@@ -30,15 +30,17 @@ test_size(SerdWorld* const world,
const SerdSyntax syntax,
const size_t stack_size)
{
- SerdSink* sink = serd_sink_new(NULL, NULL, NULL);
+ SerdSink* sink = serd_sink_new(NULL, NULL, NULL);
+ SerdByteSource* byte_source = serd_byte_source_new_string(str, NULL);
SerdReader* const reader =
serd_reader_new(world, syntax, 0u, sink, stack_size);
assert(reader);
- serd_reader_start_string(reader, str, NULL);
+ serd_reader_start(reader, byte_source);
const SerdStatus st = serd_reader_read_document(reader);
serd_reader_free(reader);
+ serd_byte_source_free(byte_source);
serd_sink_free(sink);
return st;
diff --git a/test/test_read_chunk.c b/test/test_read_chunk.c
index 78cd402e..f2681e32 100644
--- a/test/test_read_chunk.c
+++ b/test/test_read_chunk.c
@@ -94,20 +94,22 @@ main(void)
SerdWorld* world = serd_world_new();
SerdSink* sink = serd_sink_new(NULL, on_event, NULL);
+ SerdByteSource* byte_source =
+ serd_byte_source_new_string("@prefix eg: <http://example.org/> .\n"
+ "@base <http://example.org/base> .\n"
+ "eg:s1 eg:p1 eg:o1 ;\n"
+ " eg:p2 eg:o2 ,\n"
+ " eg:o3 .\n"
+ "eg:s2 eg:p1 eg:o1 ;\n"
+ " eg:p2 eg:o2 .\n"
+ "eg:s3 eg:p1 eg:o1 .\n"
+ "eg:s4 eg:p1 [ eg:p3 eg:o1 ] .\n",
+ NULL);
+
SerdReader* reader = serd_reader_new(world, SERD_TURTLE, 0, sink, 4096);
assert(reader);
- assert(!serd_reader_start_string(reader,
- "@prefix eg: <http://example.org/> .\n"
- "@base <http://example.org/base> .\n"
- "eg:s1 eg:p1 eg:o1 ;\n"
- " eg:p2 eg:o2 ,\n"
- " eg:o3 .\n"
- "eg:s2 eg:p1 eg:o1 ;\n"
- " eg:p2 eg:o2 .\n"
- "eg:s3 eg:p1 eg:o1 .\n"
- "eg:s4 eg:p1 [ eg:p3 eg:o1 ] .\n",
- NULL));
+ assert(!serd_reader_start(reader, byte_source));
assert(!serd_reader_read_chunk(reader) && n_prefix == 1);
assert(!serd_reader_read_chunk(reader) && n_base == 1);
@@ -121,6 +123,7 @@ main(void)
assert(!serd_reader_finish(reader));
serd_reader_free(reader);
+ serd_byte_source_free(byte_source);
serd_sink_free(sink);
serd_world_free(world);
diff --git a/test/test_reader.c b/test/test_reader.c
index 532411c6..7bc8ab48 100644
--- a/test/test_reader.c
+++ b/test/test_reader.c
@@ -32,6 +32,55 @@ count_statements(void* handle, const SerdEvent* event)
return SERD_SUCCESS;
}
+SERD_PURE_FUNC
+static size_t
+prepare_test_read(void* buf, size_t size, size_t nmemb, void* stream)
+{
+ assert(size == 1);
+ assert(nmemb == 1);
+
+ (void)buf;
+ (void)size;
+ (void)nmemb;
+ (void)stream;
+
+ return 0;
+}
+
+static int
+prepare_test_error(void* stream)
+{
+ (void)stream;
+ return 1;
+}
+
+static void
+test_prepare_error(void)
+{
+ SerdWorld* const world = serd_world_new();
+ size_t n_statements = 0;
+ FILE* const f = tmpfile();
+
+ SerdSink* const sink = serd_sink_new(&n_statements, count_statements, NULL);
+ assert(sink);
+
+ SerdReader* const reader = serd_reader_new(world, SERD_TURTLE, 0, sink, 4096);
+ assert(reader);
+
+ SerdByteSource* byte_source = serd_byte_source_new_function(
+ prepare_test_read, prepare_test_error, NULL, f, NULL, 1);
+
+ SerdStatus st = serd_reader_start(reader, byte_source);
+ assert(!st);
+
+ assert(serd_reader_read_document(reader) == SERD_ERR_UNKNOWN);
+
+ serd_byte_source_free(byte_source);
+ serd_reader_free(reader);
+ serd_sink_free(sink);
+ serd_world_free(world);
+}
+
static void
test_read_string(void)
{
@@ -44,18 +93,33 @@ test_read_string(void)
SerdReader* reader = serd_reader_new(world, SERD_TURTLE, 0u, sink, 4096);
assert(reader);
- // Test reading a string that ends exactly at the end of input (no newline)
- assert(
- !serd_reader_start_string(reader,
- "<http://example.org/s> <http://example.org/p> "
- "<http://example.org/o> .",
- NULL));
+ SerdByteSource* byte_source =
+ serd_byte_source_new_string("<http://example.org/s> <http://example.org/p> "
+ "<http://example.org/o> .",
+ NULL);
+ // Test reading a string that ends exactly at the end of input (no newline)
+ assert(!serd_reader_start(reader, byte_source));
assert(!serd_reader_read_document(reader));
assert(n_statements == 1);
assert(!serd_reader_finish(reader));
+ // Test reading the same but as a chunk
+ serd_byte_source_free(byte_source);
+ n_statements = 0;
+ byte_source =
+ serd_byte_source_new_string("<http://example.org/s> <http://example.org/p> "
+ "<http://example.org/o> , _:blank .",
+ NULL);
+
+ assert(!serd_reader_start(reader, byte_source));
+ assert(!serd_reader_read_chunk(reader));
+ assert(n_statements == 2);
+ assert(serd_reader_read_chunk(reader) == SERD_FAILURE);
+ assert(!serd_reader_finish(reader));
+
serd_reader_free(reader);
+ serd_byte_source_free(byte_source);
serd_sink_free(sink);
serd_world_free(world);
}
@@ -120,13 +184,16 @@ test_read_eof_by_page(void)
SerdSink* sink = serd_sink_new(&ignored, count_statements, NULL);
SerdReader* reader = serd_reader_new(world, SERD_TURTLE, 0u, sink, 4096);
- serd_reader_start_stream(
- reader, (SerdReadFunc)fread, (SerdStreamErrorFunc)ferror, temp, NULL, 4096);
+ SerdByteSource* byte_source = serd_byte_source_new_function(
+ (SerdReadFunc)fread, (SerdStreamErrorFunc)ferror, NULL, temp, NULL, 4096);
+ assert(serd_reader_start(reader, byte_source) == SERD_SUCCESS);
assert(serd_reader_read_chunk(reader) == SERD_SUCCESS);
assert(serd_reader_read_chunk(reader) == SERD_FAILURE);
assert(serd_reader_read_chunk(reader) == SERD_FAILURE);
+ assert(!serd_reader_finish(reader));
+ serd_byte_source_free(byte_source);
serd_reader_free(reader);
serd_sink_free(sink);
serd_world_free(world);
@@ -142,19 +209,23 @@ test_read_eof_by_byte(void)
SerdSink* sink = serd_sink_new(&ignored, count_statements, NULL);
SerdReader* reader = serd_reader_new(world, SERD_TURTLE, 0u, sink, 4096);
- size_t n_reads = 0u;
- serd_reader_start_stream(reader,
- (SerdReadFunc)eof_test_read,
- (SerdStreamErrorFunc)eof_test_error,
- &n_reads,
- NULL,
- 1);
+ size_t n_reads = 0u;
+ SerdByteSource* byte_source =
+ serd_byte_source_new_function((SerdReadFunc)eof_test_read,
+ (SerdStreamErrorFunc)eof_test_error,
+ NULL,
+ &n_reads,
+ NULL,
+ 1);
+ assert(serd_reader_start(reader, byte_source) == SERD_SUCCESS);
assert(serd_reader_read_chunk(reader) == SERD_SUCCESS);
assert(serd_reader_read_chunk(reader) == SERD_FAILURE);
assert(serd_reader_read_chunk(reader) == SERD_SUCCESS);
assert(serd_reader_read_chunk(reader) == SERD_FAILURE);
+ assert(!serd_reader_finish(reader));
+ serd_byte_source_free(byte_source);
serd_reader_free(reader);
serd_sink_free(sink);
serd_world_free(world);
@@ -175,8 +246,10 @@ test_read_chunks(void)
serd_reader_new(world, SERD_TURTLE, 0u, sink, 4096);
assert(reader);
- SerdStatus st = serd_reader_start_stream(
- reader, (SerdReadFunc)fread, (SerdStreamErrorFunc)ferror, f, NULL, 1);
+ SerdByteSource* byte_source = serd_byte_source_new_function(
+ (SerdReadFunc)fread, (SerdStreamErrorFunc)ferror, NULL, f, NULL, 1);
+
+ SerdStatus st = serd_reader_start(reader, byte_source);
assert(st == SERD_SUCCESS);
// Write two statement separated by null characters
@@ -217,6 +290,7 @@ test_read_chunks(void)
assert(st == SERD_FAILURE);
assert(n_statements == 2);
+ serd_byte_source_free(byte_source);
serd_reader_free(reader);
serd_sink_free(sink);
fclose(f);
@@ -226,6 +300,7 @@ test_read_chunks(void)
int
main(void)
{
+ test_prepare_error();
test_read_string();
test_read_eof_by_page();
test_read_eof_by_byte();
diff --git a/test/test_reader_writer.c b/test/test_reader_writer.c
index 8671a649..3a9bf7c9 100644
--- a/test/test_reader_writer.c
+++ b/test/test_reader_writer.c
@@ -19,7 +19,6 @@
#include "serd/serd.h"
#include <assert.h>
-#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
@@ -149,8 +148,8 @@ test_reader(const char* path)
SerdReader* reader = serd_reader_new(world, SERD_TURTLE, 0u, sink, 4096);
assert(reader);
- assert(serd_reader_read_document(reader) == SERD_FAILURE);
- assert(serd_reader_read_chunk(reader) == SERD_FAILURE);
+ assert(serd_reader_read_document(reader) == SERD_ERR_BAD_CALL);
+ assert(serd_reader_read_chunk(reader) == SERD_ERR_BAD_CALL);
serd_reader_add_blank_prefix(reader, "tmp");
@@ -163,14 +162,12 @@ test_reader(const char* path)
# pragma GCC diagnostic pop
#endif
- assert(serd_reader_start_file(reader, "http://notafile", false));
- assert(serd_reader_start_file(reader, "file://invalid", false));
- assert(serd_reader_start_file(reader, "file:///nonexistant", false));
-
- assert(!serd_reader_start_file(reader, path, true));
+ SerdByteSource* byte_source = serd_byte_source_new_filename(path, 4096);
+ assert(!serd_reader_start(reader, byte_source));
assert(!serd_reader_read_document(reader));
assert(n_statements == 6);
serd_reader_finish(reader);
+ serd_byte_source_free(byte_source);
serd_reader_free(reader);
serd_sink_free(sink);
diff --git a/test/test_string.c b/test/test_string.c
index 5bc304d1..495138d8 100644
--- a/test/test_string.c
+++ b/test/test_string.c
@@ -44,7 +44,7 @@ test_strerror(void)
{
const char* msg = serd_strerror(SERD_SUCCESS);
assert(!strcmp(msg, "Success"));
- for (int i = SERD_FAILURE; i <= SERD_ERR_NO_DATA; ++i) {
+ for (int i = SERD_FAILURE; i <= SERD_ERR_BAD_CALL; ++i) {
msg = serd_strerror((SerdStatus)i);
assert(strcmp(msg, "Success"));
}