diff options
-rw-r--r-- | serd/serd.h | 54 | ||||
-rw-r--r-- | src/n3.c | 28 | ||||
-rw-r--r-- | src/reader.c | 116 | ||||
-rw-r--r-- | src/serd_internal.h | 4 | ||||
-rw-r--r-- | src/serdi.c | 33 |
5 files changed, 99 insertions, 136 deletions
diff --git a/serd/serd.h b/serd/serd.h index c8aabb8d..3e2bc527 100644 --- a/serd/serd.h +++ b/serd/serd.h @@ -838,34 +838,26 @@ serd_reader_read_file(SerdReader* reader, const char* uri); /** - Start an incremental read from a file handle. + Prepare to read from a stream. - Iff `bulk` is true, `file` will be read a page at a time. This is more - efficient, but uses a page of memory and means that an entire page of input - must be ready before any callbacks will fire. To react as soon as input - arrives, set `bulk` to false. + The `read_func` is guaranteed to only be called for `page_size` elements + with size 1 (i.e. `page_size` bytes). */ SERD_API SerdStatus -serd_reader_start_stream(SerdReader* reader, - FILE* file, - const char* name, - bool bulk); +serd_reader_start_stream(SerdReader* reader, + SerdSource read_func, + SerdStreamErrorFunc error_func, + void* stream, + const char* name, + size_t page_size); /** - Start an incremental read from a user-specified source. - - The `read_func` is guaranteed to only be called for `page_size` elements - with size 1 (i.e. `page_size` bytes). + Prepare to read from a string. */ SERD_API SerdStatus -serd_reader_start_source_stream(SerdReader* reader, - SerdSource read_func, - SerdStreamErrorFunc error_func, - void* stream, - const char* name, - size_t page_size); +serd_reader_start_string(SerdReader* reader, const char* utf8); /** Read a single "chunk" of data during an incremental read. @@ -880,32 +872,22 @@ SerdStatus serd_reader_read_chunk(SerdReader* reader); /** - Finish an incremental read from a file handle. -*/ -SERD_API -SerdStatus -serd_reader_end_stream(SerdReader* reader); + Read a complete document from the source. -/** - Read `file`. + This function will continue pulling from the source until a complete + document has been read. Note that this may block when used with streams, + for incremental reading use serd_reader_read_chunk(). */ SERD_API SerdStatus -serd_reader_read_file_handle(SerdReader* reader, - FILE* file, - const char* name); +serd_reader_read_document(SerdReader* reader); /** - Read a user-specified byte source. + Finish reading from the source. */ SERD_API SerdStatus -serd_reader_read_source(SerdReader* reader, - SerdSource source, - SerdStreamErrorFunc error, - void* stream, - const char* name, - size_t page_size); +serd_reader_end_stream(SerdReader* reader); /** Read `utf8`. @@ -1430,22 +1430,22 @@ skip_until(SerdReader* reader, uint8_t byte) } } -bool +SerdStatus read_turtleTrigDoc(SerdReader* reader) { while (!reader->source.eof) { if (!read_n3_statement(reader)) { if (reader->strict) { - return 0; + return SERD_ERR_UNKNOWN; } skip_until(reader, '\n'); reader->status = SERD_SUCCESS; } } - return reader->status <= SERD_FAILURE; + return reader->status; } -bool +SerdStatus read_nquadsDoc(SerdReader* reader) { while (!reader->source.eof) { @@ -1458,8 +1458,9 @@ read_nquadsDoc(SerdReader* reader) reader->source.eof = true; break; } else if (peek_byte(reader) == '@') { - return r_err(reader, SERD_ERR_BAD_SYNTAX, - "syntax does not support directives\n"); + r_err(reader, SERD_ERR_BAD_SYNTAX, + "syntax does not support directives\n"); + return SERD_ERR_BAD_SYNTAX; } // subject predicate object @@ -1468,11 +1469,11 @@ read_nquadsDoc(SerdReader* reader) !(ctx.predicate = read_IRIREF(reader)) || !read_ws_star(reader) || !read_object(reader, &ctx, false, &ate_dot)) { - return false; + return SERD_ERR_UNKNOWN; } if (!ate_dot) { // graphLabel? - TRY_RET(read_ws_star(reader)); + read_ws_star(reader); switch (peek_byte(reader)) { case '.': break; @@ -1481,20 +1482,23 @@ read_nquadsDoc(SerdReader* reader) break; default: if (!(ctx.graph = read_IRIREF(reader))) { - return false; + return SERD_ERR_UNKNOWN; } } // Terminating '.' - TRY_RET(read_ws_star(reader)); + read_ws_star(reader); eat_byte_check(reader, '.'); } - TRY_RET(emit_statement(reader, ctx, ctx.object, ctx.datatype, ctx.lang)); + if (!emit_statement(reader, ctx, ctx.object, ctx.datatype, ctx.lang)) { + break; + } + pop_node(reader, ctx.graph); pop_node(reader, ctx.lang); pop_node(reader, ctx.datatype); pop_node(reader, ctx.object); } - return reader->status <= SERD_FAILURE; + return reader->status; } diff --git a/src/reader.c b/src/reader.c index cedf84cf..8c43f80a 100644 --- a/src/reader.c +++ b/src/reader.c @@ -24,6 +24,8 @@ #include <stdlib.h> #include <string.h> +static SerdStatus serd_reader_prepare(SerdReader* reader); + int r_err(SerdReader* reader, SerdStatus st, const char* fmt, ...) { @@ -59,19 +61,6 @@ blank_id(SerdReader* reader) return ref; } -/** fread-like wrapper for getc (which is faster). */ -static size_t -serd_file_read_byte(void* buf, size_t size, size_t nmemb, void* stream) -{ - const int c = getc((FILE*)stream); - if (c == EOF) { - *((uint8_t*)buf) = 0; - return 0; - } - *((uint8_t*)buf) = (uint8_t)c; - return 1; -} - Ref push_node_padded(SerdReader* reader, size_t maxlen, SerdType type, const char* str, size_t n_bytes) @@ -147,9 +136,17 @@ read_statement(SerdReader* reader) } } -static bool -read_doc(SerdReader* reader) +SERD_API +SerdStatus +serd_reader_read_document(SerdReader* reader) { + if (!reader->source.prepared) { + SerdStatus st = serd_reader_prepare(reader); + if (st) { + return st; + } + } + switch (reader->syntax) { case SERD_NQUADS: return read_nquadsDoc(reader); default: return read_turtleTrigDoc(reader); @@ -270,10 +267,20 @@ serd_reader_read_file(SerdReader* reader, return SERD_ERR_UNKNOWN; } - SerdStatus ret = serd_reader_read_file_handle(reader, fd, path); + SerdStatus st = serd_reader_start_stream( + reader, (SerdSource)fread, (SerdStreamErrorFunc)ferror, + fd, path, SERD_PAGE_SIZE); + + if (!st) { + st = serd_reader_read_document(reader); + } + + const SerdStatus est = serd_reader_end_stream(reader); + fclose(fd); free(path); - return ret; + + return st ? st : est; } static SerdStatus @@ -295,31 +302,22 @@ skip_bom(SerdReader* me) SERD_API SerdStatus -serd_reader_start_stream(SerdReader* reader, - FILE* file, - const char* name, - bool bulk) +serd_reader_start_stream(SerdReader* reader, + SerdSource read_func, + SerdStreamErrorFunc error_func, + void* stream, + const char* name, + size_t page_size) { - return serd_reader_start_source_stream( - reader, - bulk ? (SerdSource)fread : serd_file_read_byte, - (SerdStreamErrorFunc)ferror, - file, - name, - bulk ? SERD_PAGE_SIZE : 1); + return serd_byte_source_open_source( + &reader->source, read_func, error_func, stream, name, page_size); } SERD_API SerdStatus -serd_reader_start_source_stream(SerdReader* reader, - SerdSource read_func, - SerdStreamErrorFunc error_func, - void* stream, - const char* name, - size_t page_size) +serd_reader_start_string(SerdReader* reader, const char* utf8) { - return serd_byte_source_open_source( - &reader->source, read_func, error_func, stream, name, page_size); + return serd_byte_source_open_string(&reader->source, utf8); } static SerdStatus @@ -363,50 +361,12 @@ serd_reader_end_stream(SerdReader* reader) SERD_API SerdStatus -serd_reader_read_file_handle(SerdReader* reader, - FILE* file, - const char* name) -{ - return serd_reader_read_source( - reader, (SerdSource)fread, (SerdStreamErrorFunc)ferror, - file, name, SERD_PAGE_SIZE); -} - -SERD_API -SerdStatus -serd_reader_read_source(SerdReader* reader, - SerdSource source, - SerdStreamErrorFunc error, - void* stream, - const char* name, - size_t page_size) -{ - SerdStatus st = serd_reader_start_source_stream( - reader, source, error, stream, name, page_size); - - if (st || (st = serd_reader_prepare(reader))) { - serd_reader_end_stream(reader); - return st; - } else if (!read_doc(reader)) { - serd_reader_end_stream(reader); - return SERD_ERR_UNKNOWN; - } - - return serd_reader_end_stream(reader); -} - -SERD_API -SerdStatus serd_reader_read_string(SerdReader* reader, const char* utf8) { - serd_byte_source_open_string(&reader->source, utf8); - - SerdStatus st = serd_reader_prepare(reader); - if (!st) { - st = read_doc(reader) ? SERD_SUCCESS : SERD_ERR_UNKNOWN; - } + serd_reader_start_string(reader, utf8); - serd_byte_source_close(&reader->source); + const SerdStatus st = serd_reader_read_document(reader); + const SerdStatus est = serd_byte_source_close(&reader->source); - return st; + return st ? st : est; } diff --git a/src/serd_internal.h b/src/serd_internal.h index 5be89a12..6e0d7a8c 100644 --- a/src/serd_internal.h +++ b/src/serd_internal.h @@ -569,8 +569,8 @@ Ref pop_node(SerdReader* reader, Ref ref); bool emit_statement(SerdReader* reader, ReadContext ctx, Ref o, Ref d, Ref l); bool read_n3_statement(SerdReader* reader); -bool read_nquadsDoc(SerdReader* reader); -bool read_turtleTrigDoc(SerdReader* reader); +SerdStatus read_nquadsDoc(SerdReader* reader); +SerdStatus read_turtleTrigDoc(SerdReader* reader); typedef enum { FIELD_NONE, diff --git a/src/serdi.c b/src/serdi.c index ec66cfd3..caa1d9af 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -113,6 +113,19 @@ quiet_error_sink(void* handle, const SerdError* e) return SERD_SUCCESS; } +/** fread-like wrapper for getc (which is faster). */ +static size_t +serd_file_read_byte(void* buf, size_t size, size_t nmemb, void* stream) +{ + const int c = getc((FILE*)stream); + if (c == EOF) { + *((uint8_t*)buf) = 0; + return 0; + } + *((uint8_t*)buf) = (uint8_t)c; + return 1; +} + int main(int argc, char** argv) { @@ -282,17 +295,21 @@ main(int argc, char** argv) SerdStatus status = SERD_SUCCESS; if (!from_file) { - status = serd_reader_read_string(reader, input); - } else if (bulk_read) { - status = serd_reader_read_file_handle(reader, in_fd, in_name); + status = serd_reader_start_string(reader, input); } else { - status = serd_reader_start_stream(reader, in_fd, in_name, false); - while (!status) { - status = serd_reader_read_chunk(reader); - } - serd_reader_end_stream(reader); + status = serd_reader_start_stream( + reader, + bulk_read ? (SerdSource)fread : serd_file_read_byte, + (SerdStreamErrorFunc)ferror, + in_fd, + in_name, + bulk_read ? SERD_PAGE_SIZE : 1); } + status = serd_reader_read_document(reader); + + serd_reader_end_stream(reader); + serd_reader_free(reader); serd_writer_finish(writer); serd_writer_free(writer); |