From e6abc21f9bde66d8f60078493354ba3713f9fcd8 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Thu, 25 Feb 2021 15:48:04 -0500 Subject: Simplify reader interface --- include/serd/serd.h | 25 ++++++------ src/byte_source.c | 15 ++++++-- src/byte_source.h | 8 ++-- src/reader.c | 76 ++++++++++++++----------------------- src/serdi.c | 97 +++++++++-------------------------------------- src/system.h | 17 +++++++++ test/test_read_chunk.c | 2 +- test/test_reader_writer.c | 40 +++++++++---------- 8 files changed, 113 insertions(+), 167 deletions(-) diff --git a/include/serd/serd.h b/include/serd/serd.h index 9c205d26..d51adebe 100644 --- a/include/serd/serd.h +++ b/include/serd/serd.h @@ -954,11 +954,12 @@ void serd_reader_set_default_graph(SerdReader* SERD_NONNULL reader, const SerdNode* SERD_NULLABLE graph); -/// Read a file at a given `uri` +/// Prepare to read from the file at a local file `uri` SERD_API SerdStatus -serd_reader_read_file(SerdReader* SERD_NONNULL reader, - const char* SERD_NONNULL uri); +serd_reader_start_file(SerdReader* SERD_NONNULL reader, + const char* SERD_NONNULL uri, + bool bulk); /** Prepare to read from a stream. @@ -1004,18 +1005,20 @@ SERD_API SerdStatus serd_reader_read_document(SerdReader* SERD_NONNULL reader); -/// Finish reading from the source -SERD_API -SerdStatus -serd_reader_end_stream(SerdReader* SERD_NONNULL reader); +/** + Finish reading from the source. -/// Read `utf8` + This should be called before starting to read from another source. +*/ SERD_API SerdStatus -serd_reader_read_string(SerdReader* SERD_NONNULL reader, - const char* SERD_NONNULL utf8); +serd_reader_finish(SerdReader* SERD_NONNULL reader); -/// Free `reader` +/** + Free `reader`. + + The reader will be finished via `serd_reader_finish()` if necessary. +*/ SERD_API void serd_reader_free(SerdReader* SERD_NULLABLE reader); diff --git a/src/byte_source.c b/src/byte_source.c index 04836984..2f09d634 100644 --- a/src/byte_source.c +++ b/src/byte_source.c @@ -50,6 +50,7 @@ SerdStatus serd_byte_source_open_source(SerdByteSource* source, SerdReadFunc read_func, SerdStreamErrorFunc error_func, + SerdStreamCloseFunc close_func, void* stream, const char* name, size_t page_size) @@ -57,13 +58,14 @@ serd_byte_source_open_source(SerdByteSource* source, const Cursor cur = {name, 1, 1}; memset(source, '\0', sizeof(*source)); + source->read_func = read_func; + source->error_func = error_func; + source->close_func = close_func; source->stream = stream; - source->from_stream = true; source->page_size = page_size; source->buf_size = page_size; source->cur = cur; - source->error_func = error_func; - source->read_func = read_func; + source->from_stream = true; if (page_size > 1) { source->file_buf = (uint8_t*)serd_allocate_buffer(page_size); @@ -103,10 +105,15 @@ serd_byte_source_open_string(SerdByteSource* source, const char* utf8) SerdStatus serd_byte_source_close(SerdByteSource* source) { + SerdStatus st = SERD_SUCCESS; + if (source->close_func) { + st = source->close_func(source->stream) ? SERD_ERR_UNKNOWN : SERD_SUCCESS; + } + if (source->page_size > 1) { serd_free_aligned(source->file_buf); } memset(source, '\0', sizeof(*source)); - return SERD_SUCCESS; + return st; } diff --git a/src/byte_source.h b/src/byte_source.h index f0df9bf3..d2c19de3 100644 --- a/src/byte_source.h +++ b/src/byte_source.h @@ -23,7 +23,8 @@ #include #include #include -#include + +typedef int (*SerdStreamCloseFunc)(void*); typedef struct { const char* filename; @@ -34,6 +35,7 @@ typedef struct { typedef struct { SerdReadFunc read_func; ///< Read function (e.g. fread) SerdStreamErrorFunc error_func; ///< Error function (e.g. ferror) + SerdStreamCloseFunc close_func; ///< Function for closing stream void* stream; ///< Stream (e.g. FILE) size_t page_size; ///< Number of bytes to read at a time size_t buf_size; ///< Number of bytes in file_buf @@ -47,9 +49,6 @@ typedef struct { bool eof; ///< True iff end of file reached } SerdByteSource; -SerdStatus -serd_byte_source_open_file(SerdByteSource* source, FILE* file, bool bulk); - SerdStatus serd_byte_source_open_string(SerdByteSource* source, const char* utf8); @@ -57,6 +56,7 @@ SerdStatus serd_byte_source_open_source(SerdByteSource* source, SerdReadFunc read_func, SerdStreamErrorFunc error_func, + SerdStreamCloseFunc close_func, void* stream, const char* name, size_t page_size); diff --git a/src/reader.c b/src/reader.c index 805d41a7..ab9e54b5 100644 --- a/src/reader.c +++ b/src/reader.c @@ -16,11 +16,10 @@ #include "reader.h" #include "byte_source.h" +#include "serd_internal.h" #include "stack.h" #include "system.h" -#include "serd_internal.h" - #include #include #include @@ -207,6 +206,7 @@ serd_reader_free(SerdReader* reader) pop_node(reader, reader->rdf_nil); pop_node(reader, reader->rdf_rest); pop_node(reader, reader->rdf_first); + serd_reader_finish(reader); serd_node_free(reader->default_graph); #ifdef SERD_STACK_CHECK @@ -239,39 +239,6 @@ serd_reader_set_default_graph(SerdReader* reader, const SerdNode* graph) reader->default_graph = serd_node_copy(graph); } -SerdStatus -serd_reader_read_file(SerdReader* reader, const char* uri) -{ - char* const path = serd_parse_file_uri(uri, NULL); - if (!path) { - return SERD_ERR_BAD_ARG; - } - - FILE* fd = serd_fopen(path, "rb"); - if (!fd) { - serd_free(path); - return SERD_ERR_UNKNOWN; - } - - SerdStatus st = serd_reader_start_stream(reader, - (SerdReadFunc)fread, - (SerdStreamErrorFunc)ferror, - fd, - path, - SERD_PAGE_SIZE); - - if (!st) { - st = serd_reader_read_document(reader); - } - - const SerdStatus est = serd_reader_end_stream(reader); - - fclose(fd); - free(path); - - return st ? st : est; -} - static SerdStatus skip_bom(SerdReader* me) { @@ -298,7 +265,31 @@ serd_reader_start_stream(SerdReader* reader, size_t page_size) { return serd_byte_source_open_source( - &reader->source, read_func, error_func, stream, name, page_size); + &reader->source, read_func, error_func, NULL, stream, name, page_size); +} + +SerdStatus +serd_reader_start_file(SerdReader* reader, const char* uri, bool bulk) +{ + char* const path = serd_parse_file_uri(uri, NULL); + if (!path) { + return SERD_ERR_BAD_ARG; + } + + FILE* fd = serd_fopen(path, "rb"); + free(path); + if (!fd) { + return SERD_ERR_UNKNOWN; + } + + return serd_byte_source_open_source(&reader->source, + bulk ? (SerdReadFunc)fread + : serd_file_read_byte, + (SerdStreamErrorFunc)ferror, + (SerdStreamCloseFunc)fclose, + fd, + uri, + bulk ? SERD_PAGE_SIZE : 1); } SerdStatus @@ -340,18 +331,7 @@ serd_reader_read_chunk(SerdReader* reader) } SerdStatus -serd_reader_end_stream(SerdReader* reader) +serd_reader_finish(SerdReader* reader) { return serd_byte_source_close(&reader->source); } - -SerdStatus -serd_reader_read_string(SerdReader* reader, const char* utf8) -{ - serd_reader_start_string(reader, utf8); - - const SerdStatus st = serd_reader_read_document(reader); - const SerdStatus est = serd_byte_source_close(&reader->source); - - return st ? st : est; -} diff --git a/src/serdi.c b/src/serdi.c index e7b8d7ba..6049c31b 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -14,10 +14,9 @@ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#define _POSIX_C_SOURCE 200809L /* for fileno and posix_fadvise */ - #include "serd_config.h" #include "string_utils.h" +#include "system.h" #include "serd/serd.h" @@ -29,15 +28,8 @@ # include #endif -#if USE_POSIX_FADVISE && USE_FILENO -# include -#endif - -#include #include -#include #include -#include #include #define SERDI_ERROR(msg) fprintf(stderr, "serdi: " msg) @@ -135,38 +127,6 @@ quiet_error_sink(void* handle, const SerdError* e) return SERD_SUCCESS; } -static inline FILE* -serd_fopen(const char* path, const char* mode) -{ - FILE* fd = fopen(path, mode); - if (!fd) { - SERDI_ERRORF("failed to open file %s (%s)\n", path, strerror(errno)); - return NULL; - } - -#if USE_POSIX_FADVISE && USE_FILENO - posix_fadvise(fileno(fd), 0, 0, POSIX_FADV_SEQUENTIAL | POSIX_FADV_NOREUSE); -#endif - - return fd; -} - -/** fread-like wrapper for getc (which is faster). */ -static size_t -serd_file_read_byte(void* buf, size_t size, size_t nmemb, void* stream) -{ - (void)size; - (void)nmemb; - - const int c = getc((FILE*)stream); - if (c == EOF) { - *((uint8_t*)buf) = 0; - return 0; - } - *((uint8_t*)buf) = (uint8_t)c; - return 1; -} - static SerdWriterFlags choose_style(const SerdSyntax input_syntax, const SerdSyntax output_syntax, @@ -204,25 +164,23 @@ main(int argc, char** argv) return print_usage(argv[0], true); } - FILE* in_fd = NULL; SerdSyntax input_syntax = (SerdSyntax)0; SerdSyntax output_syntax = (SerdSyntax)0; - bool from_file = true; + bool from_string = false; + bool from_stdin = false; bool ascii = false; bool bulk_read = true; bool bulk_write = false; bool full_uris = false; bool lax = false; bool quiet = false; - const char* in_name = NULL; const char* add_prefix = NULL; const char* chop_prefix = NULL; const char* root_uri = NULL; int a = 1; for (; a < argc && argv[a][0] == '-'; ++a) { if (argv[a][1] == '\0') { - in_name = (const char*)"(stdin)"; - in_fd = stdin; + from_stdin = true; break; } @@ -243,8 +201,7 @@ main(int argc, char** argv) } else if (argv[a][1] == 'v') { return print_version(); } else if (argv[a][1] == 's') { - in_name = (const char*)"(string)"; - from_file = false; + from_string = true; ++a; break; } else if (argv[a][1] == 'i') { @@ -297,22 +254,9 @@ main(int argc, char** argv) _setmode(_fileno(stdout), _O_BINARY); #endif - char* input_path = NULL; - const char* input = (const char*)argv[a++]; - if (from_file) { - in_name = in_name ? in_name : input; - if (!in_fd) { - if (!strncmp(input, "file:", 5)) { - input_path = serd_parse_file_uri(input, NULL); - input = input_path; - } - if (!input || !(in_fd = serd_fopen(input, "rb"))) { - return 1; - } - } - } + const char* input = argv[a++]; - if (!input_syntax && !(input_syntax = guess_syntax(in_name))) { + if (!input_syntax && !(input_syntax = guess_syntax(input))) { input_syntax = SERD_TRIG; } @@ -329,7 +273,7 @@ main(int argc, char** argv) SerdNode* base = NULL; if (a < argc) { // Base URI given on command line base = serd_new_uri(SERD_MEASURE_STRING((const char*)argv[a])); - } else if (from_file && in_fd != stdin) { // Use input file URI + } else if (!from_string && !from_stdin) { // Use input file URI base = serd_new_file_uri(SERD_MEASURE_STRING(input), SERD_EMPTY_STRING()); } @@ -355,36 +299,31 @@ main(int argc, char** argv) serd_node_free(root); SerdStatus st = SERD_SUCCESS; - if (!from_file) { + if (from_string) { st = serd_reader_start_string(reader, input); - } else { + } else if (from_stdin) { st = serd_reader_start_stream(reader, - bulk_read ? (SerdReadFunc)fread - : serd_file_read_byte, + serd_file_read_byte, (SerdStreamErrorFunc)ferror, - in_fd, - in_name, - bulk_read ? 4096 : 1); + stdin, + "(stdin)", + 1); + } else { + st = serd_reader_start_file(reader, input, bulk_read); } if (!st) { st = serd_reader_read_document(reader); } - serd_reader_end_stream(reader); - + serd_reader_finish(reader); serd_reader_free(reader); serd_writer_finish(writer); serd_writer_free(writer); serd_env_free(env); serd_node_free(base); - free(input_path); - - if (from_file) { - fclose(in_fd); - } - if (fclose(out_fd)) { + if (fclose(stdout)) { perror("serdi: write error"); st = SERD_ERR_UNKNOWN; } diff --git a/src/system.h b/src/system.h index fdfab4a4..df5827de 100644 --- a/src/system.h +++ b/src/system.h @@ -19,6 +19,7 @@ #include "attributes.h" +#include #include /// Open a file configured for fast sequential reading @@ -37,4 +38,20 @@ serd_allocate_buffer(size_t size); void serd_free_aligned(void* ptr); +/** fread-like wrapper for getc (which is faster). */ +static inline size_t +serd_file_read_byte(void* buf, size_t size, size_t nmemb, void* stream) +{ + (void)size; + (void)nmemb; + + const int c = getc((FILE*)stream); + if (c == EOF) { + *((uint8_t*)buf) = 0; + return 0; + } + *((uint8_t*)buf) = (uint8_t)c; + return 1; +} + #endif // SERD_SYSTEM_H diff --git a/test/test_read_chunk.c b/test/test_read_chunk.c index c4e267f3..df8b43c2 100644 --- a/test/test_read_chunk.c +++ b/test/test_read_chunk.c @@ -108,7 +108,7 @@ main(void) assert(serd_reader_read_chunk(reader) == SERD_FAILURE); assert(n_end == 1); assert(serd_reader_read_chunk(reader) == SERD_FAILURE); - assert(!serd_reader_end_stream(reader)); + assert(!serd_reader_finish(reader)); serd_reader_free(reader); serd_sink_free(sink); diff --git a/test/test_reader_writer.c b/test/test_reader_writer.c index 8f80deee..697469a9 100644 --- a/test/test_reader_writer.c +++ b/test/test_reader_writer.c @@ -19,6 +19,7 @@ #include "serd/serd.h" #include +#include #include #include #include @@ -150,13 +151,14 @@ test_read_string(void) serd_sink_set_statement_func(sink, test_sink); // Test reading a string that ends exactly at the end of input (no newline) - const SerdStatus st = - serd_reader_read_string(reader, - " " - " ."); + assert( + !serd_reader_start_string(reader, + " " + " .")); - assert(!st); + assert(!serd_reader_read_document(reader)); assert(rt->n_statements == 1); + assert(!serd_reader_finish(reader)); serd_reader_free(reader); serd_sink_free(sink); @@ -257,12 +259,12 @@ test_writer(const char* const path) static void test_reader(const char* path) { - ReaderTest* rt = (ReaderTest*)calloc(1, sizeof(ReaderTest)); - SerdSink* const sink = serd_sink_new(rt, NULL); + ReaderTest rt = {0, NULL}; + SerdSink* const sink = serd_sink_new(&rt, NULL); SerdReader* reader = serd_reader_new(SERD_TURTLE, sink); - assert(sink); assert(reader); + assert(sink); serd_sink_set_statement_func(sink, test_sink); SerdNode* g = serd_new_uri(SERD_STATIC_STRING("http://example.org/")); @@ -280,17 +282,16 @@ test_reader(const char* path) serd_node_free(g); - assert(serd_reader_read_file(reader, "http://notafile")); - assert(serd_reader_read_file(reader, "file:///better/not/exist")); - assert(serd_reader_read_file(reader, "file://")); + assert(serd_reader_start_file(reader, "http://notafile", false)); + assert(serd_reader_start_file(reader, "file://invalid", false)); + assert(serd_reader_start_file(reader, "file:///nonexistant", false)); - const SerdStatus st = serd_reader_read_file(reader, path); - assert(!st); - assert(rt->n_statements == 6); - assert(rt->graph && serd_node_string(rt->graph) && - !strcmp(serd_node_string(rt->graph), "http://example.org/")); - - assert(serd_reader_read_string(reader, "This isn't Turtle at all.")); + assert(!serd_reader_start_file(reader, path, true)); + assert(!serd_reader_read_document(reader)); + assert(rt.n_statements == 6); + assert(rt.graph && serd_node_string(rt.graph) && + !strcmp(serd_node_string(rt.graph), "http://example.org/")); + serd_reader_finish(reader); // A read of a big page hits EOF then fails to read chunks immediately { @@ -311,7 +312,7 @@ test_reader(const char* path) assert(serd_reader_read_chunk(reader) == SERD_FAILURE); assert(serd_reader_read_chunk(reader) == SERD_FAILURE); - serd_reader_end_stream(reader); + serd_reader_finish(reader); fclose(temp); } @@ -333,7 +334,6 @@ test_reader(const char* path) serd_reader_free(reader); serd_sink_free(sink); - free(rt); } int -- cgit v1.2.1