aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2020-06-28 23:26:48 +0200
committerDavid Robillard <d@drobilla.net>2022-01-13 23:05:24 -0500
commit55e28966226268a57edb07419ac419ef53ac437d (patch)
tree317a3e50480f97a415dae5a7d096726a0c730c0a /src
parentb98bd7a32cf4302e0a210dd8558edd3ab2088525 (diff)
downloadserd-55e28966226268a57edb07419ac419ef53ac437d.tar.gz
serd-55e28966226268a57edb07419ac419ef53ac437d.tar.bz2
serd-55e28966226268a57edb07419ac419ef53ac437d.zip
Make Reader always read from a ByteSource
Diffstat (limited to 'src')
-rw-r--r--src/byte_source.c165
-rw-r--r--src/byte_source.h61
-rw-r--r--src/n3.c4
-rw-r--r--src/reader.c87
-rw-r--r--src/reader.h6
-rw-r--r--src/serdi.c36
-rw-r--r--src/string.c2
-rw-r--r--src/world.c27
-rw-r--r--src/world.h5
9 files changed, 203 insertions, 190 deletions
diff --git a/src/byte_source.c b/src/byte_source.c
index 2e4f66cb..ef6bf3bb 100644
--- a/src/byte_source.c
+++ b/src/byte_source.c
@@ -17,12 +17,21 @@
#include "byte_source.h"
#include "caret.h"
+#include "serd_config.h"
#include "system.h"
#include "serd/serd.h"
+#include <sys/stat.h>
+
+#if USE_POSIX_FADVISE && USE_FILENO
+# include <fcntl.h>
+#endif
+
#include <stdbool.h>
#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
SerdStatus
@@ -50,28 +59,34 @@ serd_byte_source_page(SerdByteSource* const source)
return SERD_SUCCESS;
}
-SerdStatus
-serd_byte_source_open_source(SerdByteSource* const source,
- const SerdReadFunc read_func,
- const SerdStreamErrorFunc error_func,
- const SerdStreamCloseFunc close_func,
- void* const stream,
- const SerdNode* const name,
- const size_t page_size)
+SerdByteSource*
+serd_byte_source_new_function(const SerdReadFunc read_func,
+ const SerdStreamErrorFunc error_func,
+ const SerdStreamCloseFunc close_func,
+ void* const stream,
+ const SerdNode* const name,
+ const size_t page_size)
{
- assert(page_size > 0);
- memset(source, '\0', sizeof(*source));
- source->read_func = read_func;
- source->error_func = error_func;
- source->close_func = close_func;
- source->stream = stream;
- source->page_size = page_size;
- source->buf_size = page_size;
- source->name = serd_node_copy(name);
- source->caret.file = source->name;
- source->caret.line = 1u;
- source->caret.col = 1u;
- source->from_stream = true;
+ if (!page_size) {
+ return NULL;
+ }
+
+ SerdByteSource* source = (SerdByteSource*)calloc(1, sizeof(SerdByteSource));
+
+ source->read_func = read_func;
+ source->error_func = error_func;
+ source->close_func = close_func;
+ source->stream = stream;
+ source->page_size = page_size;
+ source->buf_size = page_size;
+ source->type = FROM_FUNCTION;
+
+ source->name =
+ name ? serd_node_copy(name) : serd_new_string(SERD_STRING("func"));
+
+ source->caret.file = source->name;
+ source->caret.line = 1u;
+ source->caret.col = 1u;
if (page_size > 1) {
source->file_buf = (uint8_t*)serd_allocate_buffer(page_size);
@@ -81,58 +96,112 @@ serd_byte_source_open_source(SerdByteSource* const source,
source->read_buf = &source->read_byte;
}
- return SERD_SUCCESS;
+ return source;
}
-SerdStatus
-serd_byte_source_prepare(SerdByteSource* const source)
+static bool
+is_directory(const char* const path)
{
- if (source->page_size == 0) {
- return SERD_FAILURE;
+#ifdef _MSC_VER
+ struct stat st;
+ return !stat(path, &st) && (st.st_mode & _S_IFDIR);
+#else
+ struct stat st;
+ return !stat(path, &st) && S_ISDIR(st.st_mode);
+#endif
+}
+
+SerdByteSource*
+serd_byte_source_new_filename(const char* const path, const size_t page_size)
+{
+ if (page_size == 0 || is_directory(path)) {
+ return NULL;
}
- source->prepared = true;
+ FILE* const fd = fopen(path, "rb");
+ if (!fd) {
+ return NULL;
+ }
- if (source->from_stream) {
- return (source->page_size > 1 ? serd_byte_source_page(source)
- : serd_byte_source_advance(source));
+ SerdByteSource* source = (SerdByteSource*)calloc(1, sizeof(SerdByteSource));
+
+ source->read_func = (SerdReadFunc)fread;
+ source->error_func = (SerdStreamErrorFunc)ferror;
+ source->close_func = (SerdStreamCloseFunc)fclose;
+ source->stream = fd;
+ source->page_size = page_size;
+ source->buf_size = page_size;
+
+ source->name = serd_new_file_uri(SERD_STRING(path), SERD_EMPTY_STRING());
+ source->type = FROM_FILENAME;
+
+ source->caret.file = source->name;
+ source->caret.line = 1u;
+ source->caret.col = 1u;
+
+ if (page_size > 1) {
+ source->file_buf = (uint8_t*)serd_allocate_buffer(page_size);
+ source->read_buf = source->file_buf;
+ memset(source->file_buf, '\0', page_size);
+ } else {
+ source->read_buf = &source->read_byte;
}
- return SERD_SUCCESS;
+#if USE_POSIX_FADVISE && USE_FILENO
+ posix_fadvise(fileno(fd), 0, 0, POSIX_FADV_SEQUENTIAL);
+#endif
+
+ return source;
}
-SerdStatus
-serd_byte_source_open_string(SerdByteSource* const source,
- const char* const utf8,
- const SerdNode* const name)
+SerdByteSource*
+serd_byte_source_new_string(const char* const string,
+ const SerdNode* const name)
{
- memset(source, '\0', sizeof(*source));
+ SerdByteSource* source = (SerdByteSource*)calloc(1, sizeof(SerdByteSource));
source->page_size = 1;
+ source->read_buf = (const uint8_t*)string;
+ source->type = FROM_STRING;
+
source->name =
name ? serd_node_copy(name) : serd_new_string(SERD_STRING("string"));
- source->read_buf = (const uint8_t*)utf8;
source->caret.file = source->name;
source->caret.line = 1u;
source->caret.col = 1u;
- return SERD_SUCCESS;
+ return source;
}
SerdStatus
-serd_byte_source_close(SerdByteSource* const source)
+serd_byte_source_prepare(SerdByteSource* const source)
{
- SerdStatus st = SERD_SUCCESS;
- if (source->close_func) {
- st = source->close_func(source->stream) ? SERD_ERR_UNKNOWN : SERD_SUCCESS;
- }
+ source->prepared = true;
+ if (source->type != FROM_STRING) {
+ if (source->page_size > 1) {
+ return serd_byte_source_page(source);
+ }
- if (source->page_size > 1) {
- serd_free_aligned(source->file_buf);
+ return serd_byte_source_advance(source);
}
- serd_node_free(source->name);
- memset(source, '\0', sizeof(*source));
- return st;
+ return SERD_SUCCESS;
+}
+
+void
+serd_byte_source_free(SerdByteSource* const source)
+{
+ if (source) {
+ if (source->close_func) {
+ source->close_func(source->stream);
+ }
+
+ if (source->page_size > 1) {
+ serd_free_aligned(source->file_buf);
+ }
+
+ serd_node_free(source->name);
+ free(source);
+ }
}
diff --git a/src/byte_source.h b/src/byte_source.h
index 1c9fbd1f..d054e156 100644
--- a/src/byte_source.h
+++ b/src/byte_source.h
@@ -26,42 +26,29 @@
#include <stddef.h>
#include <stdint.h>
-typedef int (*SerdStreamCloseFunc)(void*);
-
-typedef struct {
- SerdReadFunc read_func; ///< Read function (e.g. fread)
- SerdStreamErrorFunc error_func; ///< Error function (e.g. ferror)
- SerdStreamCloseFunc close_func; ///< Function for closing stream
- void* stream; ///< Stream (e.g. FILE)
- size_t page_size; ///< Number of bytes to read at a time
- size_t buf_size; ///< Number of bytes in file_buf
- SerdNode* name; ///< Name of stream (referenced by cur)
- SerdCaret caret; ///< Caret for error reporting
- uint8_t* file_buf; ///< Buffer iff reading pages from a file
- const uint8_t* read_buf; ///< Pointer to file_buf or read_byte
- size_t read_head; ///< Offset into read_buf
- uint8_t read_byte; ///< 1-byte 'buffer' used when not paging
- bool from_stream; ///< True iff reading from `stream`
- bool prepared; ///< True iff prepared for reading
- bool eof; ///< True iff end of file reached
-} SerdByteSource;
-
-SerdStatus
-serd_byte_source_open_string(SerdByteSource* source,
- const char* utf8,
- const SerdNode* name);
-
-SerdStatus
-serd_byte_source_open_source(SerdByteSource* source,
- SerdReadFunc read_func,
- SerdStreamErrorFunc error_func,
- SerdStreamCloseFunc close_func,
- void* stream,
- const SerdNode* name,
- size_t page_size);
-
-SerdStatus
-serd_byte_source_close(SerdByteSource* source);
+typedef enum {
+ FROM_STRING, ///< Reading from a user-provided buffer
+ FROM_FILENAME, ///< Reading from a file we opened
+ FROM_FUNCTION, ///< Reading from a user-provided function
+} SerdByteSourceType;
+
+struct SerdByteSourceImpl {
+ SerdReadFunc read_func; ///< Read function (e.g. fread)
+ SerdStreamErrorFunc error_func; ///< Error function (e.g. ferror)
+ SerdStreamCloseFunc close_func; ///< Function for closing stream
+ void* stream; ///< Stream (e.g. FILE)
+ size_t page_size; ///< Number of bytes to read at a time
+ size_t buf_size; ///< Number of bytes in file_buf
+ SerdNode* name; ///< Name of stream (referenced by cur)
+ SerdCaret caret; ///< File position for error reporting
+ uint8_t* file_buf; ///< Buffer iff reading pages from a file
+ const uint8_t* read_buf; ///< Pointer to file_buf or read_byte
+ size_t read_head; ///< Offset into read_buf
+ SerdByteSourceType type; ///< Type of input
+ uint8_t read_byte; ///< 1-byte 'buffer' used when not paging
+ bool prepared; ///< True iff prepared for reading
+ bool eof; ///< True iff end of file reached
+};
SerdStatus
serd_byte_source_prepare(SerdByteSource* source);
@@ -92,7 +79,7 @@ serd_byte_source_advance(SerdByteSource* source)
++source->caret.col;
}
- if (source->from_stream) {
+ if (source->type != FROM_STRING) {
if (++source->read_head >= source->buf_size) {
st = serd_byte_source_page(source);
}
diff --git a/src/n3.c b/src/n3.c
index 46fe281b..a6aa71ba 100644
--- a/src/n3.c
+++ b/src/n3.c
@@ -1678,7 +1678,7 @@ skip_until(SerdReader* const reader, const uint8_t byte)
SerdStatus
read_turtleTrigDoc(SerdReader* const reader)
{
- while (!reader->source.eof) {
+ while (!reader->source->eof) {
const size_t orig_stack_size = reader->stack.size;
const SerdStatus st = read_n3_statement(reader);
if (st > SERD_FAILURE) {
@@ -1699,7 +1699,7 @@ SerdStatus
read_nquadsDoc(SerdReader* const reader)
{
SerdStatus st = SERD_SUCCESS;
- while (!st && !reader->source.eof) {
+ while (!st && !reader->source->eof) {
const size_t orig_stack_size = reader->stack.size;
SerdStatementFlags flags = 0;
diff --git a/src/reader.c b/src/reader.c
index ed6caafd..fe88ee14 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -37,7 +37,7 @@ r_err(SerdReader* const reader, const SerdStatus st, const char* const fmt, ...)
{
va_list args;
va_start(args, fmt);
- const SerdError e = {st, &reader->source.caret, fmt, &args};
+ const SerdError e = {st, &reader->source->caret, fmt, &args};
serd_world_error(reader->world, &e);
va_end(args);
return st;
@@ -69,7 +69,8 @@ tolerate_status(const SerdReader* const reader, const SerdStatus status)
}
if (status == SERD_ERR_INTERNAL || status == SERD_ERR_OVERFLOW ||
- status == SERD_ERR_BAD_WRITE || status == SERD_ERR_NO_DATA) {
+ status == SERD_ERR_BAD_WRITE || status == SERD_ERR_NO_DATA ||
+ status == SERD_ERR_BAD_CALL) {
return false;
}
@@ -145,7 +146,7 @@ emit_statement(SerdReader* const reader,
serd_node_zero_pad(o);
const SerdStatement statement = {{ctx.subject, ctx.predicate, o, ctx.graph},
- &reader->source.caret};
+ &reader->source->caret};
const SerdStatus st =
serd_sink_write_statement(reader->sink, *ctx.flags, &statement);
@@ -163,7 +164,11 @@ read_statement(SerdReader* const reader)
SerdStatus
serd_reader_read_document(SerdReader* const reader)
{
- if (!reader->source.prepared) {
+ if (!reader->source) {
+ return SERD_ERR_BAD_CALL;
+ }
+
+ if (!reader->source->prepared) {
SerdStatus st = serd_reader_prepare(reader);
if (st) {
return st;
@@ -241,12 +246,12 @@ serd_reader_add_blank_prefix(SerdReader* const reader, const char* const prefix)
static SerdStatus
skip_bom(SerdReader* const me)
{
- if (serd_byte_source_peek(&me->source) == 0xEF) {
- serd_byte_source_advance(&me->source);
- if (serd_byte_source_peek(&me->source) != 0xBB ||
- serd_byte_source_advance(&me->source) ||
- serd_byte_source_peek(&me->source) != 0xBF ||
- serd_byte_source_advance(&me->source)) {
+ if (serd_byte_source_peek(me->source) == 0xEF) {
+ serd_byte_source_advance(me->source);
+ if (serd_byte_source_peek(me->source) != 0xBB ||
+ serd_byte_source_advance(me->source) ||
+ serd_byte_source_peek(me->source) != 0xBF ||
+ serd_byte_source_advance(me->source)) {
r_err(me, SERD_ERR_BAD_SYNTAX, "corrupt byte order mark\n");
return SERD_ERR_BAD_SYNTAX;
}
@@ -256,60 +261,23 @@ skip_bom(SerdReader* const me)
}
SerdStatus
-serd_reader_start_stream(SerdReader* const reader,
- const SerdReadFunc read_func,
- const SerdStreamErrorFunc error_func,
- void* const stream,
- const SerdNode* const name,
- const size_t page_size)
+serd_reader_start(SerdReader* const reader, SerdByteSource* const byte_source)
{
- return serd_byte_source_open_source(
- &reader->source, read_func, error_func, NULL, stream, name, page_size);
-}
-
-SerdStatus
-serd_reader_start_file(SerdReader* reader, const char* uri, bool bulk)
-{
- char* const path = serd_parse_file_uri(uri, NULL);
- if (!path) {
- return SERD_ERR_BAD_ARG;
- }
-
- FILE* fd = serd_world_fopen(reader->world, path, "rb");
- free(path);
- if (!fd) {
- return SERD_ERR_UNKNOWN;
- }
+ serd_reader_finish(reader);
- SerdNode* const name = serd_new_uri(SERD_STRING(uri));
- const SerdStatus st = serd_byte_source_open_source(
- &reader->source,
- bulk ? (SerdReadFunc)fread : serd_file_read_byte,
- (SerdStreamErrorFunc)ferror,
- (SerdStreamCloseFunc)fclose,
- fd,
- name,
- bulk ? SERD_PAGE_SIZE : 1u);
- serd_node_free(name);
- return st;
-}
+ reader->source = byte_source;
-SerdStatus
-serd_reader_start_string(SerdReader* const reader,
- const char* const utf8,
- const SerdNode* const name)
-{
- return serd_byte_source_open_string(&reader->source, utf8, name);
+ return reader->source ? SERD_SUCCESS : SERD_ERR_BAD_ARG;
}
static SerdStatus
serd_reader_prepare(SerdReader* const reader)
{
- SerdStatus st = serd_byte_source_prepare(&reader->source);
+ SerdStatus st = serd_byte_source_prepare(reader->source);
if (st == SERD_SUCCESS) {
st = skip_bom(reader);
} else if (st == SERD_FAILURE) {
- reader->source.eof = true;
+ reader->source->eof = true;
}
return st;
}
@@ -318,10 +286,14 @@ SerdStatus
serd_reader_read_chunk(SerdReader* const reader)
{
SerdStatus st = SERD_SUCCESS;
- if (!reader->source.prepared) {
+ if (!reader->source) {
+ return SERD_ERR_BAD_CALL;
+ }
+
+ if (!reader->source->prepared) {
st = serd_reader_prepare(reader);
- } else if (reader->source.eof) {
- st = serd_byte_source_advance(&reader->source);
+ } else if (reader->source->eof) {
+ st = serd_byte_source_advance(reader->source);
}
return st ? st : read_statement(reader);
@@ -330,5 +302,6 @@ serd_reader_read_chunk(SerdReader* const reader)
SerdStatus
serd_reader_finish(SerdReader* const reader)
{
- return serd_byte_source_close(&reader->source);
+ reader->source = NULL;
+ return SERD_SUCCESS;
}
diff --git a/src/reader.h b/src/reader.h
index a8c154dd..76f46506 100644
--- a/src/reader.h
+++ b/src/reader.h
@@ -50,7 +50,7 @@ struct SerdReaderImpl {
SerdNode* rdf_first;
SerdNode* rdf_rest;
SerdNode* rdf_nil;
- SerdByteSource source;
+ SerdByteSource* source;
SerdStack stack;
SerdSyntax syntax;
unsigned next_id;
@@ -107,7 +107,7 @@ read_turtleTrigDoc(SerdReader* reader);
static inline int
peek_byte(SerdReader* reader)
{
- SerdByteSource* source = &reader->source;
+ SerdByteSource* source = reader->source;
return source->eof ? EOF : (int)source->read_buf[source->read_head];
}
@@ -120,7 +120,7 @@ eat_byte_safe(SerdReader* reader, const int byte)
const int c = peek_byte(reader);
assert(c == byte);
- serd_byte_source_advance(&reader->source);
+ serd_byte_source_advance(reader->source);
return c;
}
diff --git a/src/serdi.c b/src/serdi.c
index 2e04ae5a..3025b494 100644
--- a/src/serdi.c
+++ b/src/serdi.c
@@ -27,6 +27,7 @@
# include <io.h>
#endif
+#include <errno.h>
#include <limits.h>
#include <stdbool.h>
#include <stdio.h>
@@ -104,25 +105,36 @@ read_file(SerdWorld* const world,
syntax = syntax ? syntax : serd_guess_syntax(filename);
syntax = syntax ? syntax : SERD_TRIG;
- SerdStatus st = SERD_SUCCESS;
- SerdReader* reader = serd_reader_new(world, syntax, flags, sink, stack_size);
-
- serd_reader_add_blank_prefix(reader, add_prefix);
-
+ SerdByteSource* byte_source = NULL;
if (!strcmp(filename, "-")) {
SerdNode* name = serd_new_string(SERD_STRING("stdin"));
- st = serd_reader_start_stream(
- reader, serd_file_read_byte, (SerdStreamErrorFunc)ferror, stdin, name, 1);
+ byte_source = serd_byte_source_new_function(
+ serd_file_read_byte, (SerdStreamErrorFunc)ferror, NULL, stdin, name, 1);
serd_node_free(name);
} else {
- st = serd_reader_start_file(reader, filename, bulk_read);
+ byte_source =
+ serd_byte_source_new_filename(filename, bulk_read ? SERD_PAGE_SIZE : 1u);
+ }
+
+ if (!byte_source) {
+ SERDI_ERRORF(
+ "failed to open input file `%s' (%s)\n", filename, strerror(errno));
+
+ return SERD_ERR_UNKNOWN;
}
+ SerdReader* reader = serd_reader_new(world, syntax, flags, sink, stack_size);
+
+ serd_reader_add_blank_prefix(reader, add_prefix);
+
+ SerdStatus st = serd_reader_start(reader, byte_source);
+
st = st ? st : serd_reader_read_document(reader);
serd_reader_free(reader);
+ serd_byte_source_free(byte_source);
return st;
}
@@ -332,6 +344,9 @@ main(int argc, char** argv)
SerdStatus st = SERD_SUCCESS;
SerdNode* input_name = NULL;
if (input_string) {
+ SerdByteSource* const byte_source =
+ serd_byte_source_new_string(input_string, NULL);
+
SerdReader* const reader =
serd_reader_new(world,
input_syntax ? input_syntax : SERD_TRIG,
@@ -341,13 +356,12 @@ main(int argc, char** argv)
serd_reader_add_blank_prefix(reader, add_prefix);
- SerdNode* name = serd_new_string(SERD_STRING("string"));
- if (!(st = serd_reader_start_string(reader, input_string, name))) {
+ if (!(st = serd_reader_start(reader, byte_source))) {
st = serd_reader_read_document(reader);
}
- serd_node_free(name);
serd_reader_free(reader);
+ serd_byte_source_free(byte_source);
}
size_t prefix_len = 0;
diff --git a/src/string.c b/src/string.c
index 097e73ef..13fb9263 100644
--- a/src/string.c
+++ b/src/string.c
@@ -58,6 +58,8 @@ serd_strerror(const SerdStatus status)
return "Error writing to file";
case SERD_ERR_NO_DATA:
return "Unexpected end of input";
+ case SERD_ERR_BAD_CALL:
+ return "Invalid call";
}
return "Unknown error";
diff --git a/src/world.c b/src/world.c
index e0ce9201..bd70d615 100644
--- a/src/world.c
+++ b/src/world.c
@@ -19,14 +19,7 @@
#include "caret.h"
#include "namespaces.h"
#include "node.h"
-#include "serd_config.h"
-#include "system.h"
-#if defined(USE_POSIX_FADVISE)
-# include <fcntl.h>
-#endif
-
-#include <errno.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
@@ -34,26 +27,6 @@
#define BLANK_CHARS 12
-FILE*
-serd_world_fopen(SerdWorld* world, const char* path, const char* mode)
-{
- FILE* fd = fopen(path, mode);
- if (!fd) {
- char message[1024] = {0};
- serd_system_strerror(errno, message, sizeof(message));
-
- serd_world_errorf(
- world, SERD_ERR_INTERNAL, "failed to open file %s (%s)\n", path, message);
- return NULL;
- }
-
-#if USE_POSIX_FADVISE && USE_FILENO
- posix_fadvise(fileno(fd), 0, 0, POSIX_FADV_SEQUENTIAL);
-#endif
-
- return fd;
-}
-
SerdStatus
serd_world_error(const SerdWorld* const world, const SerdError* const e)
{
diff --git a/src/world.h b/src/world.h
index 44e21166..a70a6e28 100644
--- a/src/world.h
+++ b/src/world.h
@@ -20,7 +20,6 @@
#include "serd/serd.h"
#include <stdint.h>
-#include <stdio.h>
struct SerdWorldImpl {
SerdNodes* nodes;
@@ -37,10 +36,6 @@ struct SerdWorldImpl {
uint32_t next_blank_id;
};
-/// Open a file configured for fast sequential reading
-FILE*
-serd_world_fopen(SerdWorld* world, const char* path, const char* mode);
-
SerdStatus
serd_world_error(const SerdWorld* world, const SerdError* e);