diff options
-rw-r--r-- | serd/serd.h | 55 | ||||
-rw-r--r-- | src/n3.c | 5 | ||||
-rw-r--r-- | src/reader.c | 117 | ||||
-rw-r--r-- | src/serdi.c | 39 | ||||
-rw-r--r-- | tests/read_chunk_test.c | 33 | ||||
-rw-r--r-- | tests/serd_test.c | 26 |
6 files changed, 120 insertions, 155 deletions
diff --git a/serd/serd.h b/serd/serd.h index 4f0d8a6b..1333b61a 100644 --- a/serd/serd.h +++ b/serd/serd.h @@ -25,7 +25,6 @@ #include <stdbool.h> #include <stddef.h> #include <stdint.h> -#include <stdio.h> #ifdef SERD_SHARED # ifdef _WIN32 @@ -847,34 +846,26 @@ serd_reader_read_file(SerdReader* reader, const char* uri); /** - Start an incremental read from a file handle. + Prepare to read from a stream. - Iff `bulk` is true, `file` will be read a page at a time. This is more - efficient, but uses a page of memory and means that an entire page of input - must be ready before any callbacks will fire. To react as soon as input - arrives, set `bulk` to false. + The `read_func` is guaranteed to only be called for `page_size` elements + with size 1 (i.e. `page_size` bytes). */ SERD_API SerdStatus -serd_reader_start_stream(SerdReader* reader, - FILE* file, - const char* name, - bool bulk); +serd_reader_start_stream(SerdReader* reader, + SerdSource read_func, + SerdStreamErrorFunc error_func, + void* stream, + const char* name, + size_t page_size); /** - Start an incremental read from a user-specified source. - - The `read_func` is guaranteed to only be called for `page_size` elements - with size 1 (i.e. `page_size` bytes). + Prepare to read from a string. */ SERD_API SerdStatus -serd_reader_start_source_stream(SerdReader* reader, - SerdSource read_func, - SerdStreamErrorFunc error_func, - void* stream, - const char* name, - size_t page_size); +serd_reader_start_string(SerdReader* reader, const char* utf8); /** Read a single "chunk" of data during an incremental read. @@ -889,32 +880,22 @@ SerdStatus serd_reader_read_chunk(SerdReader* reader); /** - Finish an incremental read from a file handle. -*/ -SERD_API -SerdStatus -serd_reader_end_stream(SerdReader* reader); + Read a complete document from the source. -/** - Read `file`. + This function will continue pulling from the source until a complete + document has been read. Note that this may block when used with streams, + for incremental reading use serd_reader_read_chunk(). */ SERD_API SerdStatus -serd_reader_read_file_handle(SerdReader* reader, - FILE* file, - const char* name); +serd_reader_read_document(SerdReader* reader); /** - Read a user-specified byte source. + Finish reading from the source. */ SERD_API SerdStatus -serd_reader_read_source(SerdReader* reader, - SerdSource source, - SerdStreamErrorFunc error, - void* stream, - const char* name, - size_t page_size); +serd_reader_end_stream(SerdReader* reader); /** Read `utf8`. @@ -1583,8 +1583,9 @@ read_nquadsDoc(SerdReader* reader) if (peek_byte(reader) == EOF) { break; } else if (peek_byte(reader) == '@') { - return r_err(reader, SERD_ERR_BAD_SYNTAX, - "syntax does not support directives\n"); + r_err(reader, SERD_ERR_BAD_SYNTAX, + "syntax does not support directives\n"); + return SERD_ERR_BAD_SYNTAX; } // subject predicate object diff --git a/src/reader.c b/src/reader.c index eef4d95b..3784f297 100644 --- a/src/reader.c +++ b/src/reader.c @@ -21,11 +21,12 @@ #include <errno.h> #include <stdarg.h> -#include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> +static SerdStatus serd_reader_prepare(SerdReader* reader); + SerdStatus r_err(SerdReader* reader, SerdStatus st, const char* fmt, ...) { @@ -63,22 +64,6 @@ blank_id(SerdReader* reader) return ref; } -/** fread-like wrapper for getc (which is faster). */ -static size_t -serd_file_read_byte(void* buf, size_t size, size_t nmemb, void* stream) -{ - (void)size; - (void)nmemb; - - const int c = getc((FILE*)stream); - if (c == EOF) { - *((uint8_t*)buf) = 0; - return 0; - } - *((uint8_t*)buf) = (uint8_t)c; - return 1; -} - Ref push_node_padded(SerdReader* reader, size_t maxlen, SerdType type, const char* str, size_t n_bytes) @@ -160,9 +145,16 @@ read_statement(SerdReader* reader) return read_n3_statement(reader); } -static SerdStatus -read_doc(SerdReader* reader) +SerdStatus +serd_reader_read_document(SerdReader* reader) { + if (!reader->source.prepared) { + SerdStatus st = serd_reader_prepare(reader); + if (st) { + return st; + } + } + return ((reader->syntax == SERD_NQUADS) ? read_nquadsDoc(reader) : read_turtleTrigDoc(reader)); } @@ -277,10 +269,20 @@ serd_reader_read_file(SerdReader* reader, return SERD_ERR_UNKNOWN; } - SerdStatus ret = serd_reader_read_file_handle(reader, fd, path); + SerdStatus st = serd_reader_start_stream( + reader, (SerdSource)fread, (SerdStreamErrorFunc)ferror, + fd, path, SERD_PAGE_SIZE); + + if (!st) { + st = serd_reader_read_document(reader); + } + + const SerdStatus est = serd_reader_end_stream(reader); + fclose(fd); free(path); - return ret; + + return st ? st : est; } static SerdStatus @@ -301,30 +303,21 @@ skip_bom(SerdReader* me) } SerdStatus -serd_reader_start_stream(SerdReader* reader, - FILE* file, - const char* name, - bool bulk) +serd_reader_start_stream(SerdReader* reader, + SerdSource read_func, + SerdStreamErrorFunc error_func, + void* stream, + const char* name, + size_t page_size) { - return serd_reader_start_source_stream( - reader, - bulk ? (SerdSource)fread : serd_file_read_byte, - (SerdStreamErrorFunc)ferror, - file, - name, - bulk ? SERD_PAGE_SIZE : 1); + return serd_byte_source_open_source( + &reader->source, read_func, error_func, stream, name, page_size); } SerdStatus -serd_reader_start_source_stream(SerdReader* reader, - SerdSource read_func, - SerdStreamErrorFunc error_func, - void* stream, - const char* name, - size_t page_size) +serd_reader_start_string(SerdReader* reader, const char* utf8) { - return serd_byte_source_open_source( - &reader->source, read_func, error_func, stream, name, page_size); + return serd_byte_source_open_string(&reader->source, utf8); } static SerdStatus @@ -366,48 +359,12 @@ serd_reader_end_stream(SerdReader* reader) } SerdStatus -serd_reader_read_file_handle(SerdReader* reader, - FILE* file, - const char* name) -{ - return serd_reader_read_source( - reader, (SerdSource)fread, (SerdStreamErrorFunc)ferror, - file, name, SERD_PAGE_SIZE); -} - -SerdStatus -serd_reader_read_source(SerdReader* reader, - SerdSource source, - SerdStreamErrorFunc error, - void* stream, - const char* name, - size_t page_size) -{ - SerdStatus st = serd_reader_start_source_stream( - reader, source, error, stream, name, page_size); - - if (st || (st = serd_reader_prepare(reader))) { - serd_reader_end_stream(reader); - return st; - } else if ((st = read_doc(reader))) { - serd_reader_end_stream(reader); - return st; - } - - return serd_reader_end_stream(reader); -} - -SerdStatus serd_reader_read_string(SerdReader* reader, const char* utf8) { - serd_byte_source_open_string(&reader->source, utf8); + serd_reader_start_string(reader, utf8); - SerdStatus st = serd_reader_prepare(reader); - if (!st) { - st = read_doc(reader); - } + const SerdStatus st = serd_reader_read_document(reader); + const SerdStatus est = serd_byte_source_close(&reader->source); - serd_byte_source_close(&reader->source); - - return st; + return st ? st : est; } diff --git a/src/serdi.c b/src/serdi.c index 47118e8f..99be638c 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -32,6 +32,7 @@ #include <errno.h> #include <stdbool.h> +#include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -146,6 +147,22 @@ serd_fopen(const char* path, const char* mode) return fd; } +/** fread-like wrapper for getc (which is faster). */ +static size_t +serd_file_read_byte(void* buf, size_t size, size_t nmemb, void* stream) +{ + (void)size; + (void)nmemb; + + const int c = getc((FILE*)stream); + if (c == EOF) { + *((uint8_t*)buf) = 0; + return 0; + } + *((uint8_t*)buf) = (uint8_t)c; + return 1; +} + int main(int argc, char** argv) { @@ -320,17 +337,23 @@ main(int argc, char** argv) SerdStatus st = SERD_SUCCESS; if (!from_file) { - st = serd_reader_read_string(reader, input); - } else if (bulk_read) { - st = serd_reader_read_file_handle(reader, in_fd, in_name); + st = serd_reader_start_string(reader, input); } else { - st = serd_reader_start_stream(reader, in_fd, in_name, false); - while (!st) { - st = serd_reader_read_chunk(reader); - } - serd_reader_end_stream(reader); + st = serd_reader_start_stream( + reader, + bulk_read ? (SerdSource)fread : serd_file_read_byte, + (SerdStreamErrorFunc)ferror, + in_fd, + in_name, + bulk_read ? 4096 : 1); + } + + if (!st) { + st = serd_reader_read_document(reader); } + serd_reader_end_stream(reader); + serd_reader_free(reader); serd_writer_finish(writer); serd_writer_free(writer); diff --git a/tests/read_chunk_test.c b/tests/read_chunk_test.c index 26165667..93291e01 100644 --- a/tests/read_chunk_test.c +++ b/tests/read_chunk_test.c @@ -19,8 +19,7 @@ #include "serd/serd.h" #include <assert.h> -#include <stdbool.h> -#include <stdio.h> +#include <stddef.h> static size_t n_base = 0; static size_t n_prefix = 0; @@ -84,37 +83,31 @@ on_end(void* handle, const SerdNode* node) int main(void) { - FILE* file = tmpfile(); - - fprintf(file, - "@prefix eg: <http://example.org/> .\n" - "@base <http://example.org/base> .\n" - "eg:s1 eg:p1 eg:o1 ;\n" - " eg:p2 eg:o2 ,\n" - " eg:o3 .\n" - "eg:s2 eg:p1 eg:o1 ;\n" - " eg:p2 eg:o2 .\n" - "eg:s3 eg:p1 eg:o1 .\n" - "eg:s4 eg:p1 [ eg:p3 eg:o1 ] .\n"); - - fseek(file, 0, SEEK_SET); - SerdReader* reader = serd_reader_new( SERD_TURTLE, NULL, NULL, on_base, on_prefix, on_statement, on_end); assert(reader); - assert(!serd_reader_start_stream(reader, file, NULL, true)); + assert(!serd_reader_start_string(reader, + "@prefix eg: <http://example.org/> .\n" + "@base <http://example.org/base> .\n" + "eg:s1 eg:p1 eg:o1 ;\n" + " eg:p2 eg:o2 ,\n" + " eg:o3 .\n" + "eg:s2 eg:p1 eg:o1 ;\n" + " eg:p2 eg:o2 .\n" + "eg:s3 eg:p1 eg:o1 .\n" + "eg:s4 eg:p1 [ eg:p3 eg:o1 ] .\n")); assert(!serd_reader_read_chunk(reader) && n_prefix == 1); assert(!serd_reader_read_chunk(reader) && n_base == 1); assert(!serd_reader_read_chunk(reader) && n_statement == 3); assert(!serd_reader_read_chunk(reader) && n_statement == 5); assert(!serd_reader_read_chunk(reader) && n_statement == 6); - assert(!serd_reader_read_chunk(reader) && n_statement == 8 && n_end == 1); + assert(!serd_reader_read_chunk(reader) && n_statement == 8); assert(serd_reader_read_chunk(reader) == SERD_FAILURE); + assert(n_end == 1); assert(serd_reader_read_chunk(reader) == SERD_FAILURE); serd_reader_free(reader); - fclose(file); return 0; } diff --git a/tests/serd_test.c b/tests/serd_test.c index f0541593..ff1d2467 100644 --- a/tests/serd_test.c +++ b/tests/serd_test.c @@ -140,7 +140,12 @@ test_read_chunks(void) assert(serd_reader_handle(reader) == rt); assert(f); - SerdStatus st = serd_reader_start_stream(reader, f, NULL, false); + SerdStatus st = serd_reader_start_stream(reader, + (SerdSource)fread, + (SerdStreamErrorFunc)ferror, + f, + NULL, + 1); assert(st == SERD_SUCCESS); // Write two statement separated by null characters @@ -604,7 +609,12 @@ test_reader(const char* path) fflush(temp); fseek(temp, 0L, SEEK_SET); - serd_reader_start_stream(reader, temp, NULL, true); + serd_reader_start_stream(reader, + (SerdSource)fread, + (SerdStreamErrorFunc)ferror, + temp, + NULL, + 4096); assert(serd_reader_read_chunk(reader) == SERD_SUCCESS); assert(serd_reader_read_chunk(reader) == SERD_FAILURE); @@ -617,12 +627,12 @@ test_reader(const char* path) // A byte-wise reader that hits EOF once then continues (like a socket) { size_t n_reads = 0; - serd_reader_start_source_stream(reader, - (SerdSource)eof_test_read, - (SerdStreamErrorFunc)eof_test_error, - &n_reads, - NULL, - 1); + serd_reader_start_stream(reader, + (SerdSource)eof_test_read, + (SerdStreamErrorFunc)eof_test_error, + &n_reads, + NULL, + 1); assert(serd_reader_read_chunk(reader) == SERD_SUCCESS); assert(serd_reader_read_chunk(reader) == SERD_FAILURE); |