diff options
author | David Robillard <d@drobilla.net> | 2020-06-28 23:26:48 +0200 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2021-03-08 23:23:06 -0500 |
commit | b7948f8c9ad54c30e2579fd5da4626c6f3de325a (patch) | |
tree | 9de00308f9c0d5aa0c3587ac9f4eab7724e71484 /src | |
parent | 4e7e642d0d7b6dfa704f5ae95475854bb8c9b0b2 (diff) | |
download | serd-b7948f8c9ad54c30e2579fd5da4626c6f3de325a.tar.gz serd-b7948f8c9ad54c30e2579fd5da4626c6f3de325a.tar.bz2 serd-b7948f8c9ad54c30e2579fd5da4626c6f3de325a.zip |
WIP: Make Reader always read from a ByteSource
Diffstat (limited to 'src')
-rw-r--r-- | src/byte_source.c | 187 | ||||
-rw-r--r-- | src/byte_source.h | 59 | ||||
-rw-r--r-- | src/n3.c | 8 | ||||
-rw-r--r-- | src/reader.c | 87 | ||||
-rw-r--r-- | src/reader.h | 6 | ||||
-rw-r--r-- | src/serdi.c | 33 | ||||
-rw-r--r-- | src/string.c | 2 | ||||
-rw-r--r-- | src/world.c | 23 | ||||
-rw-r--r-- | src/world.h | 4 |
9 files changed, 200 insertions, 209 deletions
diff --git a/src/byte_source.c b/src/byte_source.c index ca02ff3e..1c86fcce 100644 --- a/src/byte_source.c +++ b/src/byte_source.c @@ -20,118 +20,167 @@ #include "serd/serd.h" +#if defined(USE_POSIX_FADVISE) && defined(USE_FILENO) +# include <fcntl.h> +#endif + #include <stdbool.h> #include <stdint.h> +#include <stdio.h> +#include <stdlib.h> #include <string.h> -SerdStatus -serd_byte_source_page(SerdByteSource* source) +SerdByteSource* +serd_byte_source_new_string(const char* string, const SerdNode* name) { - uint8_t* const buf = - (source->page_size > 1 ? source->file_buf : &source->read_byte); + SerdByteSource* source = (SerdByteSource*)calloc(1, sizeof(SerdByteSource)); - const size_t n_read = - source->read_func(buf, 1, source->page_size, source->stream); + source->page_size = 1; + source->read_buf = (const uint8_t*)string; + source->type = FROM_STRING; - source->buf_size = n_read; - source->read_head = 0; - source->eof = false; + source->name = + name ? serd_node_copy(name) : serd_new_string(SERD_STATIC_STRING("string")); - if (n_read < source->page_size) { - buf[n_read] = '\0'; - if (n_read == 0) { - source->eof = true; - return (source->error_func(source->stream) ? SERD_ERR_UNKNOWN - : SERD_FAILURE); - } - } + const SerdCursor cur = {source->name, 1, 1}; + source->cur = cur; - return SERD_SUCCESS; + return source; } -SerdStatus -serd_byte_source_open_source(SerdByteSource* source, - SerdReadFunc read_func, - SerdStreamErrorFunc error_func, - SerdStreamCloseFunc close_func, - void* stream, - const SerdNode* name, - size_t page_size) +SerdByteSource* +serd_byte_source_new_filename(const char* path, size_t block_size) { - assert(page_size > 0); - memset(source, '\0', sizeof(*source)); - source->read_func = read_func; - source->error_func = error_func; - source->close_func = close_func; - source->stream = stream; - source->page_size = page_size; - source->buf_size = page_size; - source->name = serd_node_copy(name); - source->from_stream = true; + if (!path || !block_size) { + return NULL; + } + + FILE* fd = fopen(path, "rb"); + if (!fd) { + return NULL; + } + + SerdByteSource* source = (SerdByteSource*)calloc(1, sizeof(SerdByteSource)); + + source->read_func = (SerdReadFunc)fread; + source->error_func = (SerdStreamErrorFunc)ferror; + source->close_func = (SerdStreamCloseFunc)fclose; + source->stream = fd; + source->page_size = block_size; + source->buf_size = block_size; + source->name = + serd_new_file_uri(SERD_MEASURE_STRING(path), SERD_EMPTY_STRING()); + source->type = FROM_FILENAME; const SerdCursor cur = {source->name, 1, 1}; source->cur = cur; - if (page_size > 1) { - source->file_buf = (uint8_t*)serd_allocate_buffer(page_size); + if (block_size > 1) { + source->file_buf = (uint8_t*)serd_allocate_buffer(block_size); source->read_buf = source->file_buf; - memset(source->file_buf, '\0', page_size); + memset(source->file_buf, '\0', block_size); } else { source->read_buf = &source->read_byte; } - return SERD_SUCCESS; +#if defined(USE_POSIX_FADVISE) && defined(USE_FILENO) + posix_fadvise(fileno(fd), 0, 0, POSIX_FADV_SEQUENTIAL); +#endif + + return source; } -SerdStatus -serd_byte_source_prepare(SerdByteSource* source) +SerdByteSource* +serd_byte_source_new_function(SerdReadFunc read_func, + SerdStreamErrorFunc error_func, + void* stream, + const SerdNode* name, + size_t block_size) { - if (source->page_size == 0) { - return SERD_FAILURE; + if (!read_func || !block_size) { + return NULL; } - source->prepared = true; + SerdByteSource* source = (SerdByteSource*)calloc(1, sizeof(SerdByteSource)); + + source->read_func = read_func; + source->error_func = error_func; + // source->close_func = close_func; FIXME + source->stream = stream; + source->page_size = block_size; + source->buf_size = block_size; + source->type = FROM_FUNCTION; + + source->name = + name ? serd_node_copy(name) : serd_new_string(SERD_STATIC_STRING("func")); + + const SerdCursor cur = {source->name, 1, 1}; + source->cur = cur; - if (source->from_stream) { - return (source->page_size > 1 ? serd_byte_source_page(source) - : serd_byte_source_advance(source)); + if (block_size > 1) { + source->file_buf = (uint8_t*)serd_allocate_buffer(block_size); + source->read_buf = source->file_buf; + memset(source->file_buf, '\0', block_size); + } else { + source->read_buf = &source->read_byte; } - return SERD_SUCCESS; + return source; +} + +void +serd_byte_source_free(SerdByteSource* source) +{ + if (source) { + if (source->close_func) { + source->close_func(source->stream); + } + + if (source->page_size > 1) { + free(source->file_buf); + } + + serd_node_free(source->name); + free(source); + } } SerdStatus -serd_byte_source_open_string(SerdByteSource* source, - const char* utf8, - const SerdNode* name) +serd_byte_source_page(SerdByteSource* source) { - memset(source, '\0', sizeof(*source)); + uint8_t* const buf = + (source->page_size > 1 ? source->file_buf : &source->read_byte); - source->page_size = 1; - source->name = - name ? serd_node_copy(name) : serd_new_string(SERD_STATIC_STRING("string")); + const size_t n_read = + source->read_func(buf, 1, source->page_size, source->stream); - source->read_buf = (const uint8_t*)utf8; + source->buf_size = n_read; + source->read_head = 0; + source->eof = false; - const SerdCursor cur = {source->name, 1, 1}; - source->cur = cur; + if (n_read < source->page_size) { + buf[n_read] = '\0'; + if (n_read == 0) { + source->eof = true; + return (source->error_func(source->stream) ? SERD_ERR_UNKNOWN + : SERD_FAILURE); + } + } return SERD_SUCCESS; } SerdStatus -serd_byte_source_close(SerdByteSource* source) +serd_byte_source_prepare(SerdByteSource* source) { - SerdStatus st = SERD_SUCCESS; - if (source->close_func) { - st = source->close_func(source->stream) ? SERD_ERR_UNKNOWN : SERD_SUCCESS; - } + source->prepared = true; + if (source->type != FROM_STRING) { + if (source->page_size > 1) { + return serd_byte_source_page(source); + } - if (source->page_size > 1) { - serd_free_aligned(source->file_buf); + return serd_byte_source_advance(source); } - serd_node_free(source->name); - memset(source, '\0', sizeof(*source)); - return st; + return SERD_SUCCESS; } diff --git a/src/byte_source.h b/src/byte_source.h index ff281435..a420808f 100644 --- a/src/byte_source.h +++ b/src/byte_source.h @@ -26,42 +26,31 @@ #include <stddef.h> #include <stdint.h> -typedef int (*SerdStreamCloseFunc)(void*); - -typedef struct { - SerdReadFunc read_func; ///< Read function (e.g. fread) - SerdStreamErrorFunc error_func; ///< Error function (e.g. ferror) - SerdStreamCloseFunc close_func; ///< Function for closing stream - void* stream; ///< Stream (e.g. FILE) - size_t page_size; ///< Number of bytes to read at a time - size_t buf_size; ///< Number of bytes in file_buf - SerdNode* name; ///< Name of stream (referenced by cur) - SerdCursor cur; ///< Cursor for error reporting - uint8_t* file_buf; ///< Buffer iff reading pages from a file - const uint8_t* read_buf; ///< Pointer to file_buf or read_byte - size_t read_head; ///< Offset into read_buf - uint8_t read_byte; ///< 1-byte 'buffer' used when not paging - bool from_stream; ///< True iff reading from `stream` - bool prepared; ///< True iff prepared for reading - bool eof; ///< True iff end of file reached -} SerdByteSource; - -SerdStatus -serd_byte_source_open_string(SerdByteSource* source, - const char* utf8, - const SerdNode* name); +typedef enum { + FROM_STRING, ///< Reading from a user-provided buffer + FROM_FILENAME, ///< Reading from a file we opened + FROM_FUNCTION, ///< Reading from a user-provided function +} SerdByteSourceType; -SerdStatus -serd_byte_source_open_source(SerdByteSource* source, - SerdReadFunc read_func, - SerdStreamErrorFunc error_func, - SerdStreamCloseFunc close_func, - void* stream, - const SerdNode* name, - size_t page_size); +typedef int (*SerdStreamCloseFunc)(void*); -SerdStatus -serd_byte_source_close(SerdByteSource* source); +struct SerdByteSourceImpl { + SerdReadFunc read_func; ///< Read function (e.g. fread) + SerdStreamErrorFunc error_func; ///< Error function (e.g. ferror) + SerdStreamCloseFunc close_func; ///< Function for closing stream + void* stream; ///< Stream (e.g. FILE) + size_t page_size; ///< Number of bytes to read at a time + size_t buf_size; ///< Number of bytes in file_buf + SerdNode* name; ///< Name of stream (referenced by cur) + SerdCursor cur; ///< Cursor for error reporting + uint8_t* file_buf; ///< Buffer iff reading pages from a file + const uint8_t* read_buf; ///< Pointer to file_buf or read_byte + size_t read_head; ///< Offset into read_buf + SerdByteSourceType type; ///< Type of input + uint8_t read_byte; ///< 1-byte 'buffer' used when not paging + bool prepared; ///< True iff prepared for reading + bool eof; ///< True iff end of file reached +}; SerdStatus serd_byte_source_prepare(SerdByteSource* source); @@ -91,7 +80,7 @@ serd_byte_source_advance(SerdByteSource* source) ++source->cur.col; } - if (source->from_stream) { + if (source->type != FROM_STRING) { if (++source->read_head >= source->buf_size) { st = serd_byte_source_page(source); } @@ -1097,7 +1097,7 @@ read_object(SerdReader* reader, ReadContext* ctx, bool emit, bool* ate_dot) static const size_t XSD_BOOLEAN_LEN = 40; const size_t orig_stack_size = reader->stack.size; - SerdCursor orig_cursor = reader->source.cur; + SerdCursor orig_cursor = reader->source->cur; SerdStatus ret = SERD_FAILURE; bool simple = (ctx->subject != 0); @@ -1634,11 +1634,11 @@ skip_until(SerdReader* reader, uint8_t byte) SerdStatus read_turtleTrigDoc(SerdReader* reader) { - while (!reader->source.eof) { + while (!reader->source->eof) { const size_t orig_stack_size = reader->stack.size; const SerdStatus st = read_n3_statement(reader); if (st > SERD_FAILURE) { - if (reader->strict || reader->source.eof || st == SERD_ERR_OVERFLOW) { + if (reader->strict || reader->source->eof || st == SERD_ERR_OVERFLOW) { serd_stack_pop_to(&reader->stack, orig_stack_size); return st; } @@ -1653,7 +1653,7 @@ SerdStatus read_nquadsDoc(SerdReader* reader) { SerdStatus st = SERD_SUCCESS; - while (!st && !reader->source.eof) { + while (!st && !reader->source->eof) { const size_t orig_stack_size = reader->stack.size; SerdStatementFlags flags = 0; diff --git a/src/reader.c b/src/reader.c index b0b35387..1c9469af 100644 --- a/src/reader.c +++ b/src/reader.c @@ -19,7 +19,6 @@ #include "namespaces.h" #include "stack.h" #include "statement.h" -#include "system.h" #include "world.h" #include <errno.h> @@ -36,7 +35,7 @@ r_err(SerdReader* reader, SerdStatus st, const char* fmt, ...) { va_list args; va_start(args, fmt); - const SerdError e = {st, &reader->source.cur, fmt, &args}; + const SerdError e = {st, &reader->source->cur, fmt, &args}; serd_world_error(reader->world, &e); va_end(args); return st; @@ -123,7 +122,7 @@ emit_statement(SerdReader* reader, ReadContext ctx, SerdNode* o) serd_node_zero_pad(o); const SerdStatement statement = {{ctx.subject, ctx.predicate, o, ctx.graph}, - &reader->source.cur}; + &reader->source->cur}; const SerdStatus st = serd_sink_write_statement(reader->sink, *ctx.flags, &statement); @@ -141,7 +140,11 @@ read_statement(SerdReader* reader) SerdStatus serd_reader_read_document(SerdReader* reader) { - if (!reader->source.prepared) { + if (!reader->source) { + return SERD_ERR_BAD_CALL; + } + + if (!reader->source->prepared) { SerdStatus st = serd_reader_prepare(reader); if (st) { return st; @@ -221,12 +224,12 @@ serd_reader_add_blank_prefix(SerdReader* reader, const char* prefix) static SerdStatus skip_bom(SerdReader* me) { - if (serd_byte_source_peek(&me->source) == 0xEF) { - serd_byte_source_advance(&me->source); - if (serd_byte_source_peek(&me->source) != 0xBB || - serd_byte_source_advance(&me->source) || - serd_byte_source_peek(&me->source) != 0xBF || - serd_byte_source_advance(&me->source)) { + if (serd_byte_source_peek(me->source) == 0xEF) { + serd_byte_source_advance(me->source); + if (serd_byte_source_peek(me->source) != 0xBB || + serd_byte_source_advance(me->source) || + serd_byte_source_peek(me->source) != 0xBF || + serd_byte_source_advance(me->source)) { r_err(me, SERD_ERR_BAD_SYNTAX, "corrupt byte order mark\n"); return SERD_ERR_BAD_SYNTAX; } @@ -236,64 +239,23 @@ skip_bom(SerdReader* me) } SerdStatus -serd_reader_start_stream(SerdReader* reader, - SerdReadFunc read_func, - SerdStreamErrorFunc error_func, - void* stream, - const SerdNode* name, - size_t page_size) -{ - serd_reader_finish(reader); - return serd_byte_source_open_source( - &reader->source, read_func, error_func, NULL, stream, name, page_size); -} - -SerdStatus -serd_reader_start_file(SerdReader* reader, const char* uri, bool bulk) +serd_reader_start(SerdReader* reader, SerdByteSource* byte_source) { serd_reader_finish(reader); - char* const path = serd_parse_file_uri(uri, NULL); - if (!path) { - return SERD_ERR_BAD_ARG; - } - - FILE* fd = serd_world_fopen(reader->world, path, "rb"); - free(path); - if (!fd) { - return SERD_ERR_UNKNOWN; - } + reader->source = byte_source; - SerdNode* const name = serd_new_uri(SERD_MEASURE_STRING(uri)); - const SerdStatus st = serd_byte_source_open_source( - &reader->source, - bulk ? (SerdReadFunc)fread : serd_file_read_byte, - (SerdStreamErrorFunc)ferror, - (SerdStreamCloseFunc)fclose, - fd, - name, - bulk ? SERD_PAGE_SIZE : 1u); - serd_node_free(name); - return st; -} - -SerdStatus -serd_reader_start_string(SerdReader* reader, - const char* utf8, - const SerdNode* name) -{ - serd_reader_finish(reader); - return serd_byte_source_open_string(&reader->source, utf8, name); + return reader->source ? SERD_SUCCESS : SERD_ERR_BAD_ARG; } static SerdStatus serd_reader_prepare(SerdReader* reader) { - SerdStatus st = serd_byte_source_prepare(&reader->source); + SerdStatus st = serd_byte_source_prepare(reader->source); if (st == SERD_SUCCESS) { st = skip_bom(reader); } else if (st == SERD_FAILURE) { - reader->source.eof = true; + reader->source->eof = true; } else { r_err(reader, st, "error preparing read: %s\n", strerror(errno)); } @@ -304,10 +266,14 @@ SerdStatus serd_reader_read_chunk(SerdReader* reader) { SerdStatus st = SERD_SUCCESS; - if (!reader->source.prepared) { + if (!reader->source) { + return SERD_ERR_BAD_CALL; + } + + if (!reader->source->prepared) { st = serd_reader_prepare(reader); - } else if (reader->source.eof) { - st = serd_byte_source_advance(&reader->source); + } else if (reader->source->eof) { + st = serd_byte_source_advance(reader->source); } if (peek_byte(reader) == 0) { @@ -321,5 +287,6 @@ serd_reader_read_chunk(SerdReader* reader) SerdStatus serd_reader_finish(SerdReader* reader) { - return serd_byte_source_close(&reader->source); + reader->source = NULL; + return SERD_SUCCESS; } diff --git a/src/reader.h b/src/reader.h index 7865fb39..62feeee6 100644 --- a/src/reader.h +++ b/src/reader.h @@ -57,7 +57,7 @@ struct SerdReaderImpl { SerdNode* rdf_first; SerdNode* rdf_rest; SerdNode* rdf_nil; - SerdByteSource source; + SerdByteSource* source; SerdStack stack; SerdSyntax syntax; unsigned next_id; @@ -114,7 +114,7 @@ read_turtleTrigDoc(SerdReader* reader); static inline int peek_byte(SerdReader* reader) { - SerdByteSource* source = &reader->source; + SerdByteSource* source = reader->source; return source->eof ? EOF : (int)source->read_buf[source->read_head]; } @@ -127,7 +127,7 @@ eat_byte_safe(SerdReader* reader, const int byte) const int c = peek_byte(reader); assert(c == byte); - serd_byte_source_advance(&reader->source); + serd_byte_source_advance(reader->source); return c; } diff --git a/src/serdi.c b/src/serdi.c index e8ef9897..1925cd88 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -103,25 +103,34 @@ read_file(SerdWorld* const world, syntax = syntax ? syntax : serd_guess_syntax(filename); syntax = syntax ? syntax : SERD_TRIG; - SerdStatus st = SERD_SUCCESS; - SerdReader* reader = serd_reader_new(world, syntax, flags, sink, stack_size); - - serd_reader_add_blank_prefix(reader, add_prefix); - + SerdByteSource* byte_source = NULL; if (!strcmp(filename, "-")) { SerdNode* name = serd_new_string(SERD_STATIC_STRING("stdin")); - st = serd_reader_start_stream( - reader, serd_file_read_byte, (SerdStreamErrorFunc)ferror, stdin, name, 1); + byte_source = serd_byte_source_new_function( + serd_file_read_byte, (SerdStreamErrorFunc)ferror, stdin, name, 1); serd_node_free(name); } else { - st = serd_reader_start_file(reader, filename, bulk_read); + byte_source = + serd_byte_source_new_filename(filename, bulk_read ? SERD_PAGE_SIZE : 1u); } + if (!byte_source) { + SERDI_ERRORF("failed to open input file `%s'\n", filename); + return SERD_ERR_UNKNOWN; + } + + SerdReader* reader = serd_reader_new(world, syntax, flags, sink, stack_size); + + serd_reader_add_blank_prefix(reader, add_prefix); + + SerdStatus st = serd_reader_start(reader, byte_source); + st = st ? st : serd_reader_read_document(reader); serd_reader_free(reader); + serd_byte_source_free(byte_source); return st; } @@ -307,6 +316,9 @@ main(int argc, char** argv) SerdStatus st = SERD_SUCCESS; SerdNode* input_name = NULL; if (input_string) { + SerdByteSource* const byte_source = + serd_byte_source_new_string(input_string, NULL); + SerdReader* const reader = serd_reader_new(world, input_syntax ? input_syntax : SERD_TRIG, @@ -316,13 +328,12 @@ main(int argc, char** argv) serd_reader_add_blank_prefix(reader, add_prefix); - SerdNode* name = serd_new_string(SERD_STATIC_STRING("string")); - if (!(st = serd_reader_start_string(reader, input_string, name))) { + if (!(st = serd_reader_start(reader, byte_source))) { st = serd_reader_read_document(reader); } - serd_node_free(name); serd_reader_free(reader); + serd_byte_source_free(byte_source); } size_t prefix_len = 0; diff --git a/src/string.c b/src/string.c index f0aca005..b5904c84 100644 --- a/src/string.c +++ b/src/string.c @@ -62,6 +62,8 @@ serd_strerror(SerdStatus status) return "Invalid text encoding"; case SERD_ERR_BAD_WRITE: return "Error writing to file"; + case SERD_ERR_BAD_CALL: + return "Invalid call"; default: break; } diff --git a/src/world.c b/src/world.c index 2cbee1a6..0e8df944 100644 --- a/src/world.c +++ b/src/world.c @@ -21,11 +21,6 @@ #include "cursor.h" #include "namespaces.h" #include "node.h" -#include "serd_config.h" - -#if defined(USE_POSIX_FADVISE) -# include <fcntl.h> -#endif #include <errno.h> #include <stdarg.h> @@ -35,24 +30,6 @@ #define BLANK_CHARS 12 -FILE* -serd_world_fopen(SerdWorld* world, const char* path, const char* mode) -{ - FILE* fd = fopen(path, mode); - if (!fd) { - serd_world_errorf(world, - SERD_ERR_INTERNAL, - "failed to open file %s (%s)\n", - path, - strerror(errno)); - return NULL; - } -#if defined(USE_POSIX_FADVISE) && defined(USE_FILENO) - posix_fadvise(fileno(fd), 0, 0, POSIX_FADV_SEQUENTIAL); -#endif - return fd; -} - SerdStatus serd_world_error(const SerdWorld* world, const SerdError* e) { diff --git a/src/world.h b/src/world.h index 44e21166..4f4101e4 100644 --- a/src/world.h +++ b/src/world.h @@ -37,10 +37,6 @@ struct SerdWorldImpl { uint32_t next_blank_id; }; -/// Open a file configured for fast sequential reading -FILE* -serd_world_fopen(SerdWorld* world, const char* path, const char* mode); - SerdStatus serd_world_error(const SerdWorld* world, const SerdError* e); |