From 8978501e5cf06f366eb14f6ef5f5f7f2f8e34986 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Sun, 2 Oct 2022 09:08:22 -0400 Subject: Simplify reader interface --- src/byte_source.c | 15 +++-- src/byte_source.h | 8 +-- src/reader.c | 166 ++++++++++++++++-------------------------------------- src/serdi.c | 89 ++++++++--------------------- src/system.h | 17 ++++++ 5 files changed, 106 insertions(+), 189 deletions(-) (limited to 'src') diff --git a/src/byte_source.c b/src/byte_source.c index 3a2f10b6..d6510eb9 100644 --- a/src/byte_source.c +++ b/src/byte_source.c @@ -35,6 +35,7 @@ SerdStatus serd_byte_source_open_source(SerdByteSource* const source, const SerdReadFunc read_func, const SerdStreamErrorFunc error_func, + const SerdStreamCloseFunc close_func, void* const stream, const char* const name, const size_t page_size) @@ -42,13 +43,14 @@ serd_byte_source_open_source(SerdByteSource* const source, const Cursor cur = {name, 1, 1}; memset(source, '\0', sizeof(*source)); + source->read_func = read_func; + source->error_func = error_func; + source->close_func = close_func; source->stream = stream; - source->from_stream = true; source->page_size = page_size; source->buf_size = page_size; source->cur = cur; - source->error_func = error_func; - source->read_func = read_func; + source->from_stream = true; if (page_size > 1) { source->file_buf = (uint8_t*)serd_allocate_buffer(page_size); @@ -94,10 +96,15 @@ serd_byte_source_open_string(SerdByteSource* const source, SerdStatus serd_byte_source_close(SerdByteSource* const source) { + SerdStatus st = SERD_SUCCESS; + if (source->close_func) { + st = source->close_func(source->stream) ? SERD_BAD_STREAM : SERD_SUCCESS; + } + if (source->page_size > 1) { serd_free_aligned(source->file_buf); } memset(source, '\0', sizeof(*source)); - return SERD_SUCCESS; + return st; } diff --git a/src/byte_source.h b/src/byte_source.h index 5290dca2..e2697bd2 100644 --- a/src/byte_source.h +++ b/src/byte_source.h @@ -12,7 +12,8 @@ #include #include #include -#include + +typedef int (*SerdStreamCloseFunc)(void*); typedef struct { const char* filename; @@ -23,6 +24,7 @@ typedef struct { typedef struct { SerdReadFunc read_func; ///< Read function (e.g. fread) SerdStreamErrorFunc error_func; ///< Error function (e.g. ferror) + SerdStreamCloseFunc close_func; ///< Function for closing stream void* stream; ///< Stream (e.g. FILE) size_t page_size; ///< Number of bytes to read at a time size_t buf_size; ///< Number of bytes in file_buf @@ -36,9 +38,6 @@ typedef struct { bool eof; ///< True iff end of file reached } SerdByteSource; -SerdStatus -serd_byte_source_open_file(SerdByteSource* source, FILE* file, bool bulk); - SerdStatus serd_byte_source_open_string(SerdByteSource* source, const char* utf8); @@ -46,6 +45,7 @@ SerdStatus serd_byte_source_open_source(SerdByteSource* source, SerdReadFunc read_func, SerdStreamErrorFunc error_func, + SerdStreamCloseFunc close_func, void* stream, const char* name, size_t page_size); diff --git a/src/reader.c b/src/reader.c index 284de45d..944c8d09 100644 --- a/src/reader.c +++ b/src/reader.c @@ -5,22 +5,22 @@ #include "byte_source.h" #include "node.h" +#include "serd_internal.h" #include "stack.h" #include "system.h" -#include "serd_internal.h" - -#include "serd/memory.h" #include "serd/stream.h" #include "serd/uri.h" #include #include -#include #include #include #include +static SerdStatus +serd_reader_prepare(SerdReader* reader); + SerdStatus r_err(SerdReader* const reader, const SerdStatus st, const char* const fmt, ...) { @@ -58,25 +58,6 @@ blank_id(SerdReader* const reader) return ref; } -/** fread-like wrapper for getc (which is faster). */ -static size_t -serd_file_read_byte(void* const buf, - const size_t size, - const size_t nmemb, - void* const stream) -{ - (void)size; - (void)nmemb; - - const int c = getc((FILE*)stream); - if (c == EOF) { - *((uint8_t*)buf) = 0; - return 0; - } - *((uint8_t*)buf) = (uint8_t)c; - return 1; -} - Ref push_node_padded(SerdReader* const reader, const size_t maxlen, @@ -154,9 +135,16 @@ emit_statement(SerdReader* const reader, const ReadContext ctx, const Ref o) return st; } -static SerdStatus -read_doc(SerdReader* const reader) +SerdStatus +serd_reader_read_document(SerdReader* const reader) { + if (!reader->source.prepared) { + SerdStatus st = serd_reader_prepare(reader); + if (st) { + return st; + } + } + return ((reader->syntax == SERD_NQUADS) ? read_nquadsDoc(reader) : read_turtleTrigDoc(reader)); } @@ -205,6 +193,7 @@ serd_reader_free(SerdReader* const reader) pop_node(reader, reader->rdf_nil); pop_node(reader, reader->rdf_rest); pop_node(reader, reader->rdf_first); + serd_reader_finish(reader); serd_node_free(reader->default_graph); #ifdef SERD_STACK_CHECK @@ -238,26 +227,6 @@ serd_reader_set_default_graph(SerdReader* const reader, reader->default_graph = serd_node_copy(graph); } -SerdStatus -serd_reader_read_file(SerdReader* const reader, const char* const uri) -{ - char* const path = serd_parse_file_uri(uri, NULL); - if (!path) { - return SERD_BAD_ARG; - } - - FILE* fd = serd_fopen(path, "rb"); - if (!fd) { - serd_free(path); - return SERD_BAD_STREAM; - } - - SerdStatus ret = serd_reader_read_file_handle(reader, fd, path); - fclose(fd); - free(path); - return ret; -} - static SerdStatus skip_bom(SerdReader* const me) { @@ -276,30 +245,45 @@ skip_bom(SerdReader* const me) } SerdStatus -serd_reader_start_stream(SerdReader* const reader, - FILE* const file, - const char* const name, - const bool bulk) +serd_reader_start_stream(SerdReader* const reader, + const SerdReadFunc read_func, + const SerdStreamErrorFunc error_func, + void* const stream, + const char* const name, + const size_t page_size) { - return serd_reader_start_source_stream(reader, - bulk ? (SerdReadFunc)fread - : serd_file_read_byte, - (SerdStreamErrorFunc)ferror, - file, - name, - bulk ? SERD_PAGE_SIZE : 1); + return serd_byte_source_open_source( + &reader->source, read_func, error_func, NULL, stream, name, page_size); } SerdStatus -serd_reader_start_source_stream(SerdReader* const reader, - const SerdReadFunc read_func, - const SerdStreamErrorFunc error_func, - void* const stream, - const char* const name, - const size_t page_size) +serd_reader_start_file(SerdReader* reader, const char* uri, bool bulk) { - return serd_byte_source_open_source( - &reader->source, read_func, error_func, stream, name, page_size); + char* const path = serd_parse_file_uri(uri, NULL); + if (!path) { + return SERD_BAD_ARG; + } + + FILE* fd = serd_fopen(path, "rb"); + free(path); + if (!fd) { + return SERD_BAD_STREAM; + } + + return serd_byte_source_open_source(&reader->source, + bulk ? (SerdReadFunc)fread + : serd_file_read_byte, + (SerdStreamErrorFunc)ferror, + (SerdStreamCloseFunc)fclose, + fd, + uri, + bulk ? SERD_PAGE_SIZE : 1); +} + +SerdStatus +serd_reader_start_string(SerdReader* const reader, const char* const utf8) +{ + return serd_byte_source_open_string(&reader->source, utf8); } static SerdStatus @@ -337,59 +321,7 @@ serd_reader_read_chunk(SerdReader* const reader) } SerdStatus -serd_reader_end_stream(SerdReader* const reader) +serd_reader_finish(SerdReader* const reader) { return serd_byte_source_close(&reader->source); } - -SerdStatus -serd_reader_read_file_handle(SerdReader* const reader, - FILE* const file, - const char* const name) -{ - return serd_reader_read_source(reader, - (SerdReadFunc)fread, - (SerdStreamErrorFunc)ferror, - file, - name, - SERD_PAGE_SIZE); -} - -SerdStatus -serd_reader_read_source(SerdReader* const reader, - const SerdReadFunc source, - const SerdStreamErrorFunc error, - void* const stream, - const char* const name, - const size_t page_size) -{ - SerdStatus st = serd_reader_start_source_stream( - reader, source, error, stream, name, page_size); - - if (st || (st = serd_reader_prepare(reader))) { - serd_reader_end_stream(reader); - return st; - } - - if ((st = read_doc(reader))) { - serd_reader_end_stream(reader); - return st; - } - - return serd_reader_end_stream(reader); -} - -SerdStatus -serd_reader_read_string(SerdReader* const reader, const char* const utf8) -{ - serd_byte_source_open_string(&reader->source, utf8); - - SerdStatus st = serd_reader_prepare(reader); - if (!st) { - st = read_doc(reader); - } - - serd_byte_source_close(&reader->source); - - return st; -} diff --git a/src/serdi.c b/src/serdi.c index 0de64b81..8ed02dfc 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -1,8 +1,8 @@ // Copyright 2011-2023 David Robillard // SPDX-License-Identifier: ISC -#include "serd_config.h" #include "string_utils.h" +#include "system.h" #include "serd/attributes.h" #include "serd/env.h" @@ -13,7 +13,6 @@ #include "serd/stream.h" #include "serd/string_view.h" #include "serd/syntax.h" -#include "serd/uri.h" #include "serd/version.h" #include "serd/writer.h" @@ -25,14 +24,8 @@ # include #endif -#if USE_POSIX_FADVISE && USE_FILENO -# include -#endif - -#include #include #include -#include #include #define SERDI_ERROR(msg) fprintf(stderr, "serdi: " msg) @@ -137,23 +130,6 @@ quiet_error_sink(void* const handle, const SerdError* const e) return SERD_SUCCESS; } -static FILE* -serd_fopen(const char* const path, const char* const mode) -{ - FILE* fd = fopen(path, mode); - if (!fd) { - SERDI_ERRORF("failed to open file %s (%s)\n", path, strerror(errno)); - return NULL; - } - -#if USE_POSIX_FADVISE && USE_FILENO - (void)posix_fadvise( - fileno(fd), 0, 0, POSIX_FADV_SEQUENTIAL | POSIX_FADV_NOREUSE); -#endif - - return fd; -} - static SerdWriterFlags choose_style(const SerdSyntax input_syntax, const SerdSyntax output_syntax, @@ -194,25 +170,23 @@ main(int argc, char** argv) { const char* const prog = argv[0]; - FILE* in_fd = NULL; SerdSyntax input_syntax = (SerdSyntax)0; SerdSyntax output_syntax = (SerdSyntax)0; - bool from_file = true; + bool from_string = false; + bool from_stdin = false; bool ascii = false; bool bulk_read = true; bool bulk_write = false; bool full_uris = false; bool lax = false; bool quiet = false; - const char* in_name = NULL; const char* add_prefix = NULL; const char* chop_prefix = NULL; const char* root_uri = NULL; int a = 1; - for (; a < argc && from_file && argv[a][0] == '-'; ++a) { + for (; a < argc && !from_string && argv[a][0] == '-'; ++a) { if (argv[a][1] == '\0') { - in_name = (const char*)"(stdin)"; - in_fd = stdin; + from_stdin = true; break; } @@ -244,8 +218,7 @@ main(int argc, char** argv) } else if (opt == 'v') { return print_version(); } else if (opt == 's') { - in_name = "(string)"; - from_file = false; + from_string = true; break; } else if (opt == 'c') { if (argv[a][o + 1] || ++a == argc) { @@ -303,22 +276,9 @@ main(int argc, char** argv) _setmode(_fileno(stdout), _O_BINARY); #endif - char* input_path = NULL; - const char* input = (const char*)argv[a++]; - if (from_file) { - in_name = in_name ? in_name : input; - if (!in_fd) { - if (!strncmp(input, "file:", 5)) { - input_path = serd_parse_file_uri(input, NULL); - input = input_path; - } - if (!input || !(in_fd = serd_fopen(input, "rb"))) { - return 1; - } - } - } + const char* input = argv[a++]; - if (!input_syntax && !(input_syntax = guess_syntax(in_name))) { + if (!input_syntax && !(input_syntax = guess_syntax(input))) { input_syntax = SERD_TRIG; } @@ -335,7 +295,7 @@ main(int argc, char** argv) SerdNode* base = NULL; if (a < argc) { // Base URI given on command line base = serd_new_uri(serd_string((const char*)argv[a])); - } else if (from_file && in_fd != stdin) { // Use input file URI + } else if (!from_string && !from_stdin) { // Use input file URI base = serd_new_file_uri(serd_string(input), serd_empty_string()); } @@ -365,30 +325,31 @@ main(int argc, char** argv) serd_reader_add_blank_prefix(reader, add_prefix); SerdStatus st = SERD_SUCCESS; - if (!from_file) { - st = serd_reader_read_string(reader, input); - } else if (bulk_read) { - st = serd_reader_read_file_handle(reader, in_fd, in_name); + if (from_string) { + st = serd_reader_start_string(reader, input); + } else if (from_stdin) { + st = serd_reader_start_stream(reader, + serd_file_read_byte, + (SerdStreamErrorFunc)ferror, + stdin, + "(stdin)", + 1); } else { - st = serd_reader_start_stream(reader, in_fd, in_name, false); - while (!st) { - st = serd_reader_read_chunk(reader); - } - serd_reader_end_stream(reader); + st = serd_reader_start_file(reader, input, bulk_read); } + if (!st) { + st = serd_reader_read_document(reader); + } + + serd_reader_finish(reader); serd_reader_free(reader); serd_writer_finish(writer); serd_writer_free(writer); serd_env_free(env); serd_node_free(base); - free(input_path); - - if (from_file) { - fclose(in_fd); - } - if (fclose(out_fd)) { + if (fclose(stdout)) { perror("serdi: write error"); st = SERD_BAD_STREAM; } diff --git a/src/system.h b/src/system.h index 15f93363..315ea681 100644 --- a/src/system.h +++ b/src/system.h @@ -6,6 +6,7 @@ #include "serd/attributes.h" +#include #include /// Open a file configured for fast sequential reading @@ -28,4 +29,20 @@ serd_allocate_buffer(size_t size); void serd_free_aligned(void* ptr); +/// Wrapper for getc that is compatible with SerdReadFunc +static inline size_t +serd_file_read_byte(void* buf, size_t size, size_t nmemb, void* stream) +{ + (void)size; + (void)nmemb; + + const int c = getc((FILE*)stream); + if (c == EOF) { + *((uint8_t*)buf) = 0; + return 0; + } + *((uint8_t*)buf) = (uint8_t)c; + return 1; +} + #endif // SERD_SRC_SYSTEM_H -- cgit v1.2.1