From 1ae793464e0cfb71dedc9807d97132fdc6145005 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Fri, 14 Apr 2017 10:51:46 +0200 Subject: Factor out file reading from reader --- src/byte_source.c | 107 +++++++++++++++++++++++++++++++ src/reader.c | 177 ++++++++++++++++++++++++++++------------------------ src/serd_internal.h | 46 ++++++++++++++ 3 files changed, 247 insertions(+), 83 deletions(-) create mode 100644 src/byte_source.c (limited to 'src') diff --git a/src/byte_source.c b/src/byte_source.c new file mode 100644 index 00000000..e5bb47cf --- /dev/null +++ b/src/byte_source.c @@ -0,0 +1,107 @@ +/* + Copyright 2011-2017 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "serd_internal.h" + +static inline SerdStatus +serd_byte_source_page(SerdByteSource* source) +{ + source->read_head = 0; + size_t n_read = source->read_func( + source->file_buf, 1, SERD_PAGE_SIZE, source->stream); + if (n_read == 0) { + source->file_buf[0] = '\0'; + return (source->error_func(source->stream) + ? SERD_ERR_UNKNOWN : SERD_FAILURE); + } else if (n_read < SERD_PAGE_SIZE) { + source->file_buf[n_read] = '\0'; + } + return SERD_SUCCESS; +} + +SerdStatus +serd_byte_source_open_source(SerdByteSource* source, + SerdSource read_func, + SerdStreamErrorFunc error_func, + void* stream, + bool bulk) +{ + memset(source, '\0', sizeof(*source)); + source->stream = stream; + source->from_stream = true; + source->paging = bulk; + source->error_func = error_func; + source->read_func = read_func; + + if (bulk) { + source->file_buf = (uint8_t*)serd_bufalloc(SERD_PAGE_SIZE); + source->read_buf = source->file_buf; + memset(source->file_buf, '\0', SERD_PAGE_SIZE); + } else { + source->read_buf = &source->read_byte; + } + + return SERD_SUCCESS; +} + +SerdStatus +serd_byte_source_prepare(SerdByteSource* source) +{ + if (!source->prepared) { + source->prepared = true; + if (source->paging) { + return serd_byte_source_page(source); + } else if (source->from_stream) { + return serd_byte_source_advance(source); + } + } + return SERD_SUCCESS; +} + +SerdStatus +serd_byte_source_open_string(SerdByteSource* source, const uint8_t* utf8) +{ + memset(source, '\0', sizeof(*source)); + source->read_buf = utf8; + source->prepared = true; + return SERD_SUCCESS; +} + +SerdStatus +serd_byte_source_close(SerdByteSource* source) +{ + if (source->paging) { + free(source->file_buf); + } + memset(source, '\0', sizeof(*source)); + return SERD_SUCCESS; +} + +SerdStatus +serd_byte_source_advance(SerdByteSource* source) +{ + SerdStatus st = SERD_SUCCESS; + if (source->from_stream && !source->paging) { + if (source->read_func(&source->read_byte, 1, 1, source->stream) == 0) { + return (source->error_func(source->stream) + ? SERD_ERR_UNKNOWN : SERD_FAILURE); + } + } else if (++source->read_head == SERD_PAGE_SIZE && source->paging) { + st = serd_byte_source_page(source); + } + + return st; +} diff --git a/src/reader.c b/src/reader.c index 11353a13..6b99ebf0 100644 --- a/src/reader.c +++ b/src/reader.c @@ -74,7 +74,7 @@ struct SerdReaderImpl { Ref rdf_rest; Ref rdf_nil; SerdNode default_graph; - FILE* fd; + SerdByteSource source; SerdStack stack; SerdSyntax syntax; unsigned next_id; @@ -83,12 +83,6 @@ struct SerdReaderImpl { uint8_t* buf; uint8_t* bprefix; size_t bprefix_len; - const uint8_t* read_buf; - uint8_t* file_buf; - int32_t read_head; ///< Offset into read_buf - uint8_t read_byte; ///< 1-byte 'buffer' used when not paging - bool from_file; ///< True iff reading from `fd` - bool paging; ///< True iff reading a page at a time bool strict; ///< True iff strict parsing bool eof; bool seen_genid; @@ -123,25 +117,23 @@ r_err(SerdReader* reader, SerdStatus st, const char* fmt, ...) return 0; } -static inline SerdStatus -page(SerdReader* reader) +/** fread-like wrapper for getc (which is faster). */ +static size_t +serd_file_read_byte(void* buf, size_t size, size_t nmemb, void* stream) { - reader->read_head = 0; - size_t n_read = fread(reader->file_buf, 1, SERD_PAGE_SIZE, reader->fd); - if (n_read == 0) { - reader->file_buf[0] = '\0'; - reader->eof = true; - return ferror(reader->fd) ? SERD_ERR_UNKNOWN : SERD_FAILURE; - } else if (n_read < SERD_PAGE_SIZE) { - reader->file_buf[n_read] = '\0'; + const int c = getc((FILE*)stream); + if (c == EOF) { + *((uint8_t*)buf) = 0; + return 0; } - return SERD_SUCCESS; + *((uint8_t*)buf) = (uint8_t)c; + return 1; } static inline uint8_t peek_byte(SerdReader* reader) { - return reader->read_buf[reader->read_head]; + return serd_byte_source_peek(&reader->source); } static inline uint8_t @@ -154,19 +146,7 @@ eat_byte_safe(SerdReader* reader, const uint8_t byte) default: ++reader->cur.col; } - if (reader->from_file && !reader->paging) { - const int c = fgetc(reader->fd); - reader->read_byte = (c == EOF) ? 0 : (uint8_t)c; - if (c == EOF) { - reader->eof = true; - } - } else if (++reader->read_head == SERD_PAGE_SIZE && reader->paging) { - const SerdStatus st = page(reader); - if (st > SERD_FAILURE) { - reader->status = st; - r_err(reader, st, "read error: %s\n", strerror(errno)); - } - } + reader->status = serd_byte_source_advance(&reader->source); return byte; } @@ -1827,13 +1807,18 @@ serd_reader_read_file(SerdReader* reader, return ret; } -static void +static bool skip_bom(SerdReader* me) { - const uint8_t* const b = me->read_buf; - if (me->paging && b[0] == 0xEF && b[1] == 0xBB && b[2] == 0xBF) { - me->read_head += 3; + if (peek_byte(me) == 0xEF) { + eat_byte_safe(me, 0xEF); + if (eat_byte_check(me, 0xBB) != 0xBB || + eat_byte_check(me, 0xBF) != 0xBF) { + return r_err(me, SERD_ERR_BAD_SYNTAX, "corrupt byte order mark\n"); + } } + + return true; } SERD_API @@ -1842,44 +1827,57 @@ serd_reader_start_stream(SerdReader* me, FILE* file, const uint8_t* name, bool bulk) +{ + return serd_reader_start_source_stream( + me, + bulk ? (SerdSource)fread : serd_file_read_byte, + (SerdStreamErrorFunc)ferror, + file, + name, + bulk); +} + +SERD_API +SerdStatus +serd_reader_start_source_stream(SerdReader* me, + SerdSource read_func, + SerdStreamErrorFunc error_func, + void* stream, + const uint8_t* name, + bool bulk) { const Cursor cur = { name, 1, 1 }; - me->fd = file; - me->read_head = 0; - me->cur = cur; - me->from_file = true; - me->eof = false; - me->paging = bulk; - - if (bulk) { - me->file_buf = (uint8_t*)serd_bufalloc(SERD_PAGE_SIZE); - me->read_buf = me->file_buf; - memset(me->file_buf, '\0', SERD_PAGE_SIZE); - SerdStatus st = page(me); - if (st) { - serd_reader_end_stream(me); - return st; - } - skip_bom(me); - } else { - me->read_buf = &me->read_byte; - me->read_byte = 0; // Don't read to avoid potentially blocking - } + me->cur = cur; - return SERD_SUCCESS; + return serd_byte_source_open_source( + &me->source, read_func, error_func, stream, bulk); +} + +static SerdStatus +serd_reader_prepare(SerdReader* me) +{ + me->eof = false; + if ((me->status = serd_byte_source_prepare(&me->source))) { + r_err(me, me->status, "read error: %s\n", strerror(errno)); + } else if (!skip_bom(me)) { + me->status = SERD_ERR_BAD_SYNTAX; + } + return me->status; } SERD_API SerdStatus serd_reader_read_chunk(SerdReader* me) { - if (!me->read_byte) { - // Read initial byte - const int c = fgetc(me->fd); - me->read_byte = (c == EOF) ? 0 : (uint8_t)c; - if (c == EOF) { - me->eof = true; - return SERD_FAILURE; + SerdStatus st = SERD_SUCCESS; + if (!me->source.prepared) { + if ((st = serd_reader_prepare(me))) { + return st; + } + } else if (me->eof) { + me->eof = false; + if ((st = serd_byte_source_advance(&me->source))) { + return st; } } return read_statement(me) ? SERD_SUCCESS : SERD_FAILURE; @@ -1889,24 +1887,37 @@ SERD_API SerdStatus serd_reader_end_stream(SerdReader* me) { - if (me->paging) { - free(me->file_buf); - } - me->fd = NULL; - me->read_buf = me->file_buf = NULL; - return SERD_SUCCESS; + return serd_byte_source_close(&me->source); } SERD_API SerdStatus serd_reader_read_file_handle(SerdReader* me, FILE* file, const uint8_t* name) { - SerdStatus st = serd_reader_start_stream(me, file, name, true); - if (!st) { - st = read_doc(me) ? SERD_SUCCESS : SERD_ERR_UNKNOWN; + return serd_reader_read_source( + me, (SerdSource)fread, (SerdStreamErrorFunc)ferror, file, name); +} + +SERD_API +SerdStatus +serd_reader_read_source(SerdReader* me, + SerdSource source, + SerdStreamErrorFunc error, + void* stream, + const uint8_t* name) +{ + SerdStatus st = serd_reader_start_source_stream( + me, source, error, stream, name, true); + + if ((st = serd_reader_prepare(me))) { serd_reader_end_stream(me); + return st; + } else if (!read_doc(me)) { + serd_reader_end_stream(me); + return SERD_ERR_UNKNOWN; } - return st; + + return serd_reader_end_stream(me); } SERD_API @@ -1915,16 +1926,16 @@ serd_reader_read_string(SerdReader* me, const uint8_t* utf8) { const Cursor cur = { (const uint8_t*)"(string)", 1, 1 }; - me->read_buf = utf8; - me->read_head = 0; + serd_byte_source_open_string(&me->source, utf8); me->cur = cur; - me->from_file = false; - me->paging = false; me->eof = false; - skip_bom(me); - const bool ret = read_doc(me); + SerdStatus st = serd_reader_prepare(me); + if (!st) { + st = read_doc(me) ? SERD_SUCCESS : SERD_ERR_UNKNOWN; + } + + serd_byte_source_close(&me->source); - me->read_buf = NULL; - return ret ? SERD_SUCCESS : SERD_ERR_UNKNOWN; + return st; } diff --git a/src/serd_internal.h b/src/serd_internal.h index b942072c..ef83f03c 100644 --- a/src/serd_internal.h +++ b/src/serd_internal.h @@ -66,6 +66,52 @@ serd_bufalloc(size_t size) #endif } +/* Byte source */ + +typedef struct { + SerdSource read_func; ///< Read function (e.g. fread) + SerdStreamErrorFunc error_func; ///< Error function (e.g. ferror) + void* stream; ///< Stream (e.g. FILE) + uint8_t* file_buf; ///< Buffer iff reading pages from a file + const uint8_t* read_buf; ///< Pointer to file_buf or read_byte + size_t read_head; ///< Offset into read_buf + uint8_t read_byte; ///< 1-byte 'buffer' used when not paging + bool from_stream; ///< True iff reading from `stream` + bool paging; ///< True iff reading a page at a time + bool prepared; ///< True iff prepared for reading +} SerdByteSource; + +SerdStatus +serd_byte_source_open_file(SerdByteSource* source, + FILE* file, + bool bulk); + +SerdStatus +serd_byte_source_open_string(SerdByteSource* source, const uint8_t* utf8); + +SerdStatus +serd_byte_source_open_source(SerdByteSource* source, + SerdSource read_func, + SerdStreamErrorFunc error_func, + void* stream, + bool bulk); + +SerdStatus +serd_byte_source_close(SerdByteSource* source); + +SerdStatus +serd_byte_source_prepare(SerdByteSource* source); + +static inline uint8_t +serd_byte_source_peek(SerdByteSource* source) +{ + assert(source->prepared); + return source->read_buf[source->read_head]; +} + +SerdStatus +serd_byte_source_advance(SerdByteSource* source); + /* Stack */ /** A dynamic stack in memory. */ -- cgit v1.2.1