diff options
author | David Robillard <d@drobilla.net> | 2020-08-16 12:42:58 +0200 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2021-03-07 15:32:24 -0500 |
commit | 9a8e06aa5bdc62ed589bd8ed5789bd059cec0700 (patch) | |
tree | ba5263262fad84c5f58bd60f3c6f6fcba30d6516 | |
parent | d4ae57afbd7da668dbf89f7b7e66e2f064437a98 (diff) | |
download | serd-9a8e06aa5bdc62ed589bd8ed5789bd059cec0700.tar.gz serd-9a8e06aa5bdc62ed589bd8ed5789bd059cec0700.tar.bz2 serd-9a8e06aa5bdc62ed589bd8ed5789bd059cec0700.zip |
Simplify reader interface
-rw-r--r-- | include/serd/serd.h | 60 | ||||
-rw-r--r-- | src/n3.c | 3 | ||||
-rw-r--r-- | src/reader.c | 124 | ||||
-rw-r--r-- | src/serdi.c | 39 | ||||
-rw-r--r-- | test/test_read_chunk.c | 38 | ||||
-rw-r--r-- | test/test_reader_writer.c | 19 |
6 files changed, 121 insertions, 162 deletions
diff --git a/include/serd/serd.h b/include/serd/serd.h index c2c9d52d..000a8ef8 100644 --- a/include/serd/serd.h +++ b/include/serd/serd.h @@ -23,7 +23,6 @@ #include <stdbool.h> #include <stddef.h> #include <stdint.h> -#include <stdio.h> #if defined(_WIN32) && !defined(SERD_STATIC) && defined(SERD_INTERNAL) # define SERD_API __declspec(dllexport) @@ -877,34 +876,25 @@ serd_reader_read_file(SerdReader* SERD_NONNULL reader, const char* SERD_NONNULL uri); /** - Start an incremental read from a file handle. - - Iff `bulk` is true, `file` will be read a page at a time. This is more - efficient, but uses a page of memory and means that an entire page of input - must be ready before any callbacks will fire. To react as soon as input - arrives, set `bulk` to false. -*/ -SERD_API -SerdStatus -serd_reader_start_stream(SerdReader* SERD_NONNULL reader, - FILE* SERD_NONNULL file, - const char* SERD_NULLABLE name, - bool bulk); - -/** - Start an incremental read from a user-specified source. + Prepare to read from a stream. The `read_func` is guaranteed to only be called for `page_size` elements with size 1 (i.e. `page_size` bytes). */ SERD_API SerdStatus -serd_reader_start_source_stream(SerdReader* SERD_NONNULL reader, - SerdSource SERD_NONNULL read_func, - SerdStreamErrorFunc SERD_NONNULL error_func, - void* SERD_NONNULL stream, - const char* SERD_NULLABLE name, - size_t page_size); +serd_reader_start_stream(SerdReader* SERD_NONNULL reader, + SerdSource SERD_NONNULL read_func, + SerdStreamErrorFunc SERD_NONNULL error_func, + void* SERD_NONNULL stream, + const char* SERD_NULLABLE name, + size_t page_size); + +/// Prepare to read from a string +SERD_API +SerdStatus +serd_reader_start_string(SerdReader* SERD_NONNULL reader, + const char* SERD_NONNULL utf8); /** Read a single "chunk" of data during an incremental read @@ -918,27 +908,21 @@ SERD_API SerdStatus serd_reader_read_chunk(SerdReader* SERD_NONNULL reader); -/// Finish an incremental read from a file handle -SERD_API -SerdStatus -serd_reader_end_stream(SerdReader* SERD_NONNULL reader); +/** + Read a complete document from the source. -/// Read `file` + This function will continue pulling from the source until a complete + document has been read. Note that this may block when used with streams, + for incremental reading use serd_reader_read_chunk(). +*/ SERD_API SerdStatus -serd_reader_read_file_handle(SerdReader* SERD_NONNULL reader, - FILE* SERD_NONNULL file, - const char* SERD_NULLABLE name); +serd_reader_read_document(SerdReader* SERD_NONNULL reader); -/// Read a user-specified byte source +/// Finish reading from the source SERD_API SerdStatus -serd_reader_read_source(SerdReader* SERD_NONNULL reader, - SerdSource SERD_NONNULL source, - SerdStreamErrorFunc SERD_NONNULL error, - void* SERD_NONNULL stream, - const char* SERD_NULLABLE name, - size_t page_size); +serd_reader_end_stream(SerdReader* SERD_NONNULL reader); /// Read `utf8` SERD_API @@ -1683,8 +1683,9 @@ read_nquadsDoc(SerdReader* reader) } if (peek_byte(reader) == '@') { - return r_err( + r_err( reader, SERD_ERR_BAD_SYNTAX, "syntax does not support directives\n"); + return SERD_ERR_BAD_SYNTAX; } // subject predicate object diff --git a/src/reader.c b/src/reader.c index 6e814d93..194c295b 100644 --- a/src/reader.c +++ b/src/reader.c @@ -23,11 +23,13 @@ #include <errno.h> #include <stdarg.h> -#include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> +static SerdStatus +serd_reader_prepare(SerdReader* reader); + SerdStatus r_err(SerdReader* reader, SerdStatus st, const char* fmt, ...) { @@ -65,22 +67,6 @@ blank_id(SerdReader* reader) return ref; } -/** fread-like wrapper for getc (which is faster). */ -static size_t -serd_file_read_byte(void* buf, size_t size, size_t nmemb, void* stream) -{ - (void)size; - (void)nmemb; - - const int c = getc((FILE*)stream); - if (c == EOF) { - *((uint8_t*)buf) = 0; - return 0; - } - *((uint8_t*)buf) = (uint8_t)c; - return 1; -} - Ref push_node_padded(SerdReader* reader, size_t maxlen, @@ -165,9 +151,16 @@ read_statement(SerdReader* reader) return read_n3_statement(reader); } -static SerdStatus -read_doc(SerdReader* reader) +SerdStatus +serd_reader_read_document(SerdReader* reader) { + if (!reader->source.prepared) { + SerdStatus st = serd_reader_prepare(reader); + if (st) { + return st; + } + } + return ((reader->syntax == SERD_NQUADS) ? read_nquadsDoc(reader) : read_turtleTrigDoc(reader)); } @@ -281,10 +274,23 @@ serd_reader_read_file(SerdReader* reader, const char* uri) return SERD_ERR_UNKNOWN; } - SerdStatus ret = serd_reader_read_file_handle(reader, fd, path); + SerdStatus st = serd_reader_start_stream(reader, + (SerdSource)fread, + (SerdStreamErrorFunc)ferror, + fd, + path, + SERD_PAGE_SIZE); + + if (!st) { + st = serd_reader_read_document(reader); + } + + const SerdStatus est = serd_reader_end_stream(reader); + fclose(fd); free(path); - return ret; + + return st ? st : est; } static SerdStatus @@ -305,30 +311,21 @@ skip_bom(SerdReader* me) } SerdStatus -serd_reader_start_stream(SerdReader* reader, - FILE* file, - const char* name, - bool bulk) +serd_reader_start_stream(SerdReader* reader, + SerdSource read_func, + SerdStreamErrorFunc error_func, + void* stream, + const char* name, + size_t page_size) { - return serd_reader_start_source_stream(reader, - bulk ? (SerdSource)fread - : serd_file_read_byte, - (SerdStreamErrorFunc)ferror, - file, - name, - bulk ? SERD_PAGE_SIZE : 1); + return serd_byte_source_open_source( + &reader->source, read_func, error_func, stream, name, page_size); } SerdStatus -serd_reader_start_source_stream(SerdReader* reader, - SerdSource read_func, - SerdStreamErrorFunc error_func, - void* stream, - const char* name, - size_t page_size) +serd_reader_start_string(SerdReader* reader, const char* utf8) { - return serd_byte_source_open_source( - &reader->source, read_func, error_func, stream, name, page_size); + return serd_byte_source_open_string(&reader->source, utf8); } static SerdStatus @@ -370,51 +367,12 @@ serd_reader_end_stream(SerdReader* reader) } SerdStatus -serd_reader_read_file_handle(SerdReader* reader, FILE* file, const char* name) -{ - return serd_reader_read_source(reader, - (SerdSource)fread, - (SerdStreamErrorFunc)ferror, - file, - name, - SERD_PAGE_SIZE); -} - -SerdStatus -serd_reader_read_source(SerdReader* reader, - SerdSource source, - SerdStreamErrorFunc error, - void* stream, - const char* name, - size_t page_size) -{ - SerdStatus st = serd_reader_start_source_stream( - reader, source, error, stream, name, page_size); - - if (st || (st = serd_reader_prepare(reader))) { - serd_reader_end_stream(reader); - return st; - } - - if ((st = read_doc(reader))) { - serd_reader_end_stream(reader); - return st; - } - - return serd_reader_end_stream(reader); -} - -SerdStatus serd_reader_read_string(SerdReader* reader, const char* utf8) { - serd_byte_source_open_string(&reader->source, utf8); + serd_reader_start_string(reader, utf8); - SerdStatus st = serd_reader_prepare(reader); - if (!st) { - st = read_doc(reader); - } + const SerdStatus st = serd_reader_read_document(reader); + const SerdStatus est = serd_byte_source_close(&reader->source); - serd_byte_source_close(&reader->source); - - return st; + return st ? st : est; } diff --git a/src/serdi.c b/src/serdi.c index f0cf8f4b..0e59c5ee 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -35,6 +35,7 @@ #include <errno.h> #include <stdbool.h> +#include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -150,6 +151,22 @@ serd_fopen(const char* path, const char* mode) return fd; } +/** fread-like wrapper for getc (which is faster). */ +static size_t +serd_file_read_byte(void* buf, size_t size, size_t nmemb, void* stream) +{ + (void)size; + (void)nmemb; + + const int c = getc((FILE*)stream); + if (c == EOF) { + *((uint8_t*)buf) = 0; + return 0; + } + *((uint8_t*)buf) = (uint8_t)c; + return 1; +} + static SerdWriterFlags choose_style(const SerdSyntax input_syntax, const SerdSyntax output_syntax, @@ -345,17 +362,23 @@ main(int argc, char** argv) SerdStatus st = SERD_SUCCESS; if (!from_file) { - st = serd_reader_read_string(reader, input); - } else if (bulk_read) { - st = serd_reader_read_file_handle(reader, in_fd, in_name); + st = serd_reader_start_string(reader, input); } else { - st = serd_reader_start_stream(reader, in_fd, in_name, false); - while (!st) { - st = serd_reader_read_chunk(reader); - } - serd_reader_end_stream(reader); + st = serd_reader_start_stream(reader, + bulk_read ? (SerdSource)fread + : serd_file_read_byte, + (SerdStreamErrorFunc)ferror, + in_fd, + in_name, + bulk_read ? 4096 : 1); + } + + if (!st) { + st = serd_reader_read_document(reader); } + serd_reader_end_stream(reader); + serd_reader_free(reader); serd_writer_finish(writer); serd_writer_free(writer); diff --git a/test/test_read_chunk.c b/test/test_read_chunk.c index dd2961ac..76e9b642 100644 --- a/test/test_read_chunk.c +++ b/test/test_read_chunk.c @@ -19,8 +19,7 @@ #include "serd/serd.h" #include <assert.h> -#include <stdbool.h> -#include <stdio.h> +#include <stddef.h> static size_t n_base = 0; static size_t n_prefix = 0; @@ -80,39 +79,32 @@ on_end(void* handle, const SerdNode* node) int main(void) { - FILE* file = tmpfile(); - - fprintf(file, - "@prefix eg: <http://example.org/> .\n" - "@base <http://example.org/base> .\n" - "eg:s1 eg:p1 eg:o1 ;\n" - " eg:p2 eg:o2 ,\n" - " eg:o3 .\n" - "eg:s2 eg:p1 eg:o1 ;\n" - " eg:p2 eg:o2 .\n" - "eg:s3 eg:p1 eg:o1 .\n" - "eg:s4 eg:p1 [ eg:p3 eg:o1 ] .\n"); - - fseek(file, 0, SEEK_SET); - SerdReader* reader = serd_reader_new( SERD_TURTLE, NULL, NULL, on_base, on_prefix, on_statement, on_end); - assert(reader); - assert(!serd_reader_start_stream(reader, file, NULL, true)); + + assert(!serd_reader_start_string(reader, + "@prefix eg: <http://example.org/> .\n" + "@base <http://example.org/base> .\n" + "eg:s1 eg:p1 eg:o1 ;\n" + " eg:p2 eg:o2 ,\n" + " eg:o3 .\n" + "eg:s2 eg:p1 eg:o1 ;\n" + " eg:p2 eg:o2 .\n" + "eg:s3 eg:p1 eg:o1 .\n" + "eg:s4 eg:p1 [ eg:p3 eg:o1 ] .\n")); assert(!serd_reader_read_chunk(reader) && n_prefix == 1); assert(!serd_reader_read_chunk(reader) && n_base == 1); assert(!serd_reader_read_chunk(reader) && n_statement == 3); assert(!serd_reader_read_chunk(reader) && n_statement == 5); assert(!serd_reader_read_chunk(reader) && n_statement == 6); - assert(!serd_reader_read_chunk(reader) && n_statement == 8 && n_end == 1); + assert(!serd_reader_read_chunk(reader) && n_statement == 8); assert(serd_reader_read_chunk(reader) == SERD_FAILURE); + assert(n_end == 1); assert(serd_reader_read_chunk(reader) == SERD_FAILURE); - assert(!serd_reader_end_stream(reader)); - serd_reader_free(reader); - fclose(file); + serd_reader_free(reader); return 0; } diff --git a/test/test_reader_writer.c b/test/test_reader_writer.c index 5431462a..48bb508e 100644 --- a/test/test_reader_writer.c +++ b/test/test_reader_writer.c @@ -19,7 +19,6 @@ #include "serd/serd.h" #include <assert.h> -#include <stdbool.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> @@ -89,7 +88,8 @@ test_read_chunks(void) assert(serd_reader_handle(reader) == rt); assert(f); - SerdStatus st = serd_reader_start_stream(reader, f, NULL, false); + SerdStatus st = serd_reader_start_stream( + reader, (SerdSource)fread, (SerdStreamErrorFunc)ferror, f, NULL, 1); assert(st == SERD_SUCCESS); // Write two statement separated by null characters @@ -292,7 +292,8 @@ test_reader(const char* path) fflush(temp); fseek(temp, 0L, SEEK_SET); - serd_reader_start_stream(reader, temp, NULL, true); + serd_reader_start_stream( + reader, (SerdSource)fread, (SerdStreamErrorFunc)ferror, temp, NULL, 4096); assert(serd_reader_read_chunk(reader) == SERD_SUCCESS); assert(serd_reader_read_chunk(reader) == SERD_FAILURE); @@ -305,12 +306,12 @@ test_reader(const char* path) // A byte-wise reader that hits EOF once then continues (like a socket) { size_t n_reads = 0; - serd_reader_start_source_stream(reader, - (SerdSource)eof_test_read, - (SerdStreamErrorFunc)eof_test_error, - &n_reads, - NULL, - 1); + serd_reader_start_stream(reader, + (SerdSource)eof_test_read, + (SerdStreamErrorFunc)eof_test_error, + &n_reads, + NULL, + 1); assert(serd_reader_read_chunk(reader) == SERD_SUCCESS); assert(serd_reader_read_chunk(reader) == SERD_FAILURE); |