From ecaf0fc72f53a8d42909aa8d8d8be873500a0bc0 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Sat, 10 Feb 2018 13:32:06 +0100 Subject: Simplify reader interface --- serd/serd.h | 54 ++++++++---------------- src/n3.c | 28 +++++++------ src/reader.c | 116 +++++++++++++++++----------------------------------- src/serd_internal.h | 4 +- src/serdi.c | 36 ++++++++++++---- tests/serd_test.c | 14 ++++++- 6 files changed, 113 insertions(+), 139 deletions(-) diff --git a/serd/serd.h b/serd/serd.h index 32dbe21e..89bce9cf 100644 --- a/serd/serd.h +++ b/serd/serd.h @@ -847,34 +847,26 @@ serd_reader_read_file(SerdReader* reader, const char* uri); /** - Start an incremental read from a file handle. + Prepare to read from a stream. - Iff `bulk` is true, `file` will be read a page at a time. This is more - efficient, but uses a page of memory and means that an entire page of input - must be ready before any callbacks will fire. To react as soon as input - arrives, set `bulk` to false. + The `read_func` is guaranteed to only be called for `page_size` elements + with size 1 (i.e. `page_size` bytes). */ SERD_API SerdStatus -serd_reader_start_stream(SerdReader* reader, - FILE* file, - const char* name, - bool bulk); +serd_reader_start_stream(SerdReader* reader, + SerdSource read_func, + SerdStreamErrorFunc error_func, + void* stream, + const char* name, + size_t page_size); /** - Start an incremental read from a user-specified source. - - The `read_func` is guaranteed to only be called for `page_size` elements - with size 1 (i.e. `page_size` bytes). + Prepare to read from a string. */ SERD_API SerdStatus -serd_reader_start_source_stream(SerdReader* reader, - SerdSource read_func, - SerdStreamErrorFunc error_func, - void* stream, - const char* name, - size_t page_size); +serd_reader_start_string(SerdReader* reader, const char* utf8); /** Read a single "chunk" of data during an incremental read. @@ -889,32 +881,22 @@ SerdStatus serd_reader_read_chunk(SerdReader* reader); /** - Finish an incremental read from a file handle. -*/ -SERD_API -SerdStatus -serd_reader_end_stream(SerdReader* reader); + Read a complete document from the source. -/** - Read `file`. + This function will continue pulling from the source until a complete + document has been read. Note that this may block when used with streams, + for incremental reading use serd_reader_read_chunk(). */ SERD_API SerdStatus -serd_reader_read_file_handle(SerdReader* reader, - FILE* file, - const char* name); +serd_reader_read_document(SerdReader* reader); /** - Read a user-specified byte source. + Finish reading from the source. */ SERD_API SerdStatus -serd_reader_read_source(SerdReader* reader, - SerdSource source, - SerdStreamErrorFunc error, - void* stream, - const char* name, - size_t page_size); +serd_reader_end_stream(SerdReader* reader); /** Read `utf8`. diff --git a/src/n3.c b/src/n3.c index 9239f67c..b80660ab 100644 --- a/src/n3.c +++ b/src/n3.c @@ -1444,22 +1444,22 @@ skip_until(SerdReader* reader, uint8_t byte) } } -bool +SerdStatus read_turtleTrigDoc(SerdReader* reader) { while (!reader->source.eof) { if (!read_n3_statement(reader)) { if (reader->strict) { - return 0; + return SERD_ERR_UNKNOWN; } skip_until(reader, '\n'); reader->status = SERD_SUCCESS; } } - return reader->status <= SERD_FAILURE; + return reader->status; } -bool +SerdStatus read_nquadsDoc(SerdReader* reader) { while (!reader->source.eof) { @@ -1471,8 +1471,9 @@ read_nquadsDoc(SerdReader* reader) if (peek_byte(reader) == EOF) { break; } else if (peek_byte(reader) == '@') { - return r_err(reader, SERD_ERR_BAD_SYNTAX, - "syntax does not support directives\n"); + r_err(reader, SERD_ERR_BAD_SYNTAX, + "syntax does not support directives\n"); + return SERD_ERR_BAD_SYNTAX; } // subject predicate object @@ -1481,11 +1482,11 @@ read_nquadsDoc(SerdReader* reader) !(ctx.predicate = read_IRIREF(reader)) || !read_ws_star(reader) || !read_object(reader, &ctx, false, &ate_dot)) { - return false; + return SERD_ERR_UNKNOWN; } if (!ate_dot) { // graphLabel? - TRY_RET(read_ws_star(reader)); + read_ws_star(reader); switch (peek_byte(reader)) { case '.': break; @@ -1494,20 +1495,23 @@ read_nquadsDoc(SerdReader* reader) break; default: if (!(ctx.graph = read_IRIREF(reader))) { - return false; + return SERD_ERR_UNKNOWN; } } // Terminating '.' - TRY_RET(read_ws_star(reader)); + read_ws_star(reader); eat_byte_check(reader, '.'); } - TRY_RET(emit_statement(reader, ctx, ctx.object, ctx.datatype, ctx.lang)); + if (!emit_statement(reader, ctx, ctx.object, ctx.datatype, ctx.lang)) { + break; + } + pop_node(reader, ctx.graph); pop_node(reader, ctx.lang); pop_node(reader, ctx.datatype); pop_node(reader, ctx.object); } - return reader->status <= SERD_FAILURE; + return reader->status; } diff --git a/src/reader.c b/src/reader.c index cbf40232..354618dc 100644 --- a/src/reader.c +++ b/src/reader.c @@ -25,6 +25,8 @@ #include #include +static SerdStatus serd_reader_prepare(SerdReader* reader); + int r_err(SerdReader* reader, SerdStatus st, const char* fmt, ...) { @@ -62,22 +64,6 @@ blank_id(SerdReader* reader) return ref; } -/** fread-like wrapper for getc (which is faster). */ -static size_t -serd_file_read_byte(void* buf, size_t size, size_t nmemb, void* stream) -{ - (void)size; - (void)nmemb; - - const int c = getc((FILE*)stream); - if (c == EOF) { - *((uint8_t*)buf) = 0; - return 0; - } - *((uint8_t*)buf) = (uint8_t)c; - return 1; -} - Ref push_node_padded(SerdReader* reader, size_t maxlen, SerdType type, const char* str, size_t n_bytes) @@ -153,9 +139,16 @@ read_statement(SerdReader* reader) } } -static bool -read_doc(SerdReader* reader) +SerdStatus +serd_reader_read_document(SerdReader* reader) { + if (!reader->source.prepared) { + SerdStatus st = serd_reader_prepare(reader); + if (st) { + return st; + } + } + return ((reader->syntax == SERD_NQUADS) ? read_nquadsDoc(reader) : read_turtleTrigDoc(reader)); } @@ -266,10 +259,20 @@ serd_reader_read_file(SerdReader* reader, return SERD_ERR_UNKNOWN; } - SerdStatus ret = serd_reader_read_file_handle(reader, fd, path); + SerdStatus st = serd_reader_start_stream( + reader, (SerdSource)fread, (SerdStreamErrorFunc)ferror, + fd, path, SERD_PAGE_SIZE); + + if (!st) { + st = serd_reader_read_document(reader); + } + + const SerdStatus est = serd_reader_end_stream(reader); + fclose(fd); free(path); - return ret; + + return st ? st : est; } static SerdStatus @@ -290,30 +293,21 @@ skip_bom(SerdReader* me) } SerdStatus -serd_reader_start_stream(SerdReader* reader, - FILE* file, - const char* name, - bool bulk) +serd_reader_start_stream(SerdReader* reader, + SerdSource read_func, + SerdStreamErrorFunc error_func, + void* stream, + const char* name, + size_t page_size) { - return serd_reader_start_source_stream( - reader, - bulk ? (SerdSource)fread : serd_file_read_byte, - (SerdStreamErrorFunc)ferror, - file, - name, - bulk ? SERD_PAGE_SIZE : 1); + return serd_byte_source_open_source( + &reader->source, read_func, error_func, stream, name, page_size); } SerdStatus -serd_reader_start_source_stream(SerdReader* reader, - SerdSource read_func, - SerdStreamErrorFunc error_func, - void* stream, - const char* name, - size_t page_size) +serd_reader_start_string(SerdReader* reader, const char* utf8) { - return serd_byte_source_open_source( - &reader->source, read_func, error_func, stream, name, page_size); + return serd_byte_source_open_string(&reader->source, utf8); } static SerdStatus @@ -354,49 +348,13 @@ serd_reader_end_stream(SerdReader* reader) return serd_byte_source_close(&reader->source); } -SerdStatus -serd_reader_read_file_handle(SerdReader* reader, - FILE* file, - const char* name) -{ - return serd_reader_read_source( - reader, (SerdSource)fread, (SerdStreamErrorFunc)ferror, - file, name, SERD_PAGE_SIZE); -} - -SerdStatus -serd_reader_read_source(SerdReader* reader, - SerdSource source, - SerdStreamErrorFunc error, - void* stream, - const char* name, - size_t page_size) -{ - SerdStatus st = serd_reader_start_source_stream( - reader, source, error, stream, name, page_size); - - if (st || (st = serd_reader_prepare(reader))) { - serd_reader_end_stream(reader); - return st; - } else if (!read_doc(reader)) { - serd_reader_end_stream(reader); - return SERD_ERR_UNKNOWN; - } - - return serd_reader_end_stream(reader); -} - SerdStatus serd_reader_read_string(SerdReader* reader, const char* utf8) { - serd_byte_source_open_string(&reader->source, utf8); - - SerdStatus st = serd_reader_prepare(reader); - if (!st) { - st = read_doc(reader) ? SERD_SUCCESS : SERD_ERR_UNKNOWN; - } + serd_reader_start_string(reader, utf8); - serd_byte_source_close(&reader->source); + const SerdStatus st = serd_reader_read_document(reader); + const SerdStatus est = serd_byte_source_close(&reader->source); - return st; + return st ? st : est; } diff --git a/src/serd_internal.h b/src/serd_internal.h index 7d06c2e1..fd5aca5e 100644 --- a/src/serd_internal.h +++ b/src/serd_internal.h @@ -633,8 +633,8 @@ Ref pop_node(SerdReader* reader, Ref ref); bool emit_statement(SerdReader* reader, ReadContext ctx, Ref o, Ref d, Ref l); bool read_n3_statement(SerdReader* reader); -bool read_nquadsDoc(SerdReader* reader); -bool read_turtleTrigDoc(SerdReader* reader); +SerdStatus read_nquadsDoc(SerdReader* reader); +SerdStatus read_turtleTrigDoc(SerdReader* reader); typedef enum { FIELD_NONE, diff --git a/src/serdi.c b/src/serdi.c index a631aebf..98ef99c8 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -120,6 +120,22 @@ quiet_error_sink(void* handle, const SerdError* e) return SERD_SUCCESS; } +/** fread-like wrapper for getc (which is faster). */ +static size_t +serd_file_read_byte(void* buf, size_t size, size_t nmemb, void* stream) +{ + (void)size; + (void)nmemb; + + const int c = getc((FILE*)stream); + if (c == EOF) { + *((uint8_t*)buf) = 0; + return 0; + } + *((uint8_t*)buf) = (uint8_t)c; + return 1; +} + int main(int argc, char** argv) { @@ -294,17 +310,21 @@ main(int argc, char** argv) SerdStatus status = SERD_SUCCESS; if (!from_file) { - status = serd_reader_read_string(reader, input); - } else if (bulk_read) { - status = serd_reader_read_file_handle(reader, in_fd, in_name); + status = serd_reader_start_string(reader, input); } else { - status = serd_reader_start_stream(reader, in_fd, in_name, false); - while (!status) { - status = serd_reader_read_chunk(reader); - } - serd_reader_end_stream(reader); + status = serd_reader_start_stream( + reader, + bulk_read ? (SerdSource)fread : serd_file_read_byte, + (SerdStreamErrorFunc)ferror, + in_fd, + in_name, + bulk_read ? SERD_PAGE_SIZE : 1); } + status = serd_reader_read_document(reader); + + serd_reader_end_stream(reader); + serd_reader_free(reader); serd_writer_finish(writer); serd_writer_free(writer); diff --git a/tests/serd_test.c b/tests/serd_test.c index c15f45d3..9b3cc2d8 100644 --- a/tests/serd_test.c +++ b/tests/serd_test.c @@ -122,7 +122,12 @@ test_read_chunks(void) assert(serd_reader_get_handle(reader) == rt); assert(f); - SerdStatus st = serd_reader_start_stream(reader, f, NULL, false); + SerdStatus st = serd_reader_start_stream(reader, + (SerdSource)fread, + (SerdStreamErrorFunc)ferror, + f, + NULL, + 1); assert(st == SERD_SUCCESS); // Write two statement separated by null characters @@ -158,11 +163,16 @@ test_read_chunks(void) assert(st == SERD_SUCCESS); // FIXME: return SERD_FAILURE? assert(rt->n_statements == 2); - // EOF + // FIXME: Successful read of nothing st = serd_reader_read_chunk(reader); assert(st == SERD_SUCCESS); // FIXME: return SERD_FAILURE? assert(rt->n_statements == 2); + // EOF + st = serd_reader_read_chunk(reader); + assert(st == SERD_FAILURE); + assert(rt->n_statements == 2); + serd_reader_free(reader); fclose(f); } -- cgit v1.2.1