From 4de86fdfb643dbe7113e261998c5e159f940b7d4 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Tue, 8 May 2012 23:51:51 +0000 Subject: Add incremental read interface suitable for reading from infinite streams. git-svn-id: http://svn.drobilla.net/serd/trunk@350 490d8e77-9747-427b-9fa3-0b8f29cee8a0 --- src/reader.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++---------- src/serdi.c | 19 ++++++++++++--- 2 files changed, 82 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/reader.c b/src/reader.c index 70dc7fe5..5c623039 100644 --- a/src/reader.c +++ b/src/reader.c @@ -78,7 +78,9 @@ struct SerdReaderImpl { unsigned next_id; uint8_t* read_buf; int32_t read_head; ///< Offset into read_buf + uint8_t read_byte; ///< 1-byte 'buffer' used when not paging bool from_file; ///< True iff reading from @ref fd + bool paging; ///< True iff reading a page at a time bool eof; bool seen_genid; #ifdef SERD_STACK_CHECK @@ -124,14 +126,19 @@ static inline uint8_t eat_byte_safe(SerdReader* reader, const uint8_t byte) { assert(peek_byte(reader) == byte); - ++reader->read_head; switch (byte) { case '\0': reader->eof = true; break; case '\n': ++reader->cur.line; reader->cur.col = 0; break; default: ++reader->cur.col; } - if (reader->from_file && (reader->read_head == SERD_PAGE_SIZE)) { + if (reader->from_file && !reader->paging) { + const int c = fgetc(reader->fd); + reader->read_byte = (c == EOF) ? 0 : (uint8_t)c; + if (c == EOF) { + reader->eof = true; + } + } else if (++reader->read_head == SERD_PAGE_SIZE && reader->paging) { page(reader); } return byte; @@ -548,7 +555,7 @@ read_comment(SerdReader* reader) { eat_byte_safe(reader, '#'); uint8_t c; - while (((c = peek_byte(reader)) != 0xA) && (c != 0xD)) { + while (((c = peek_byte(reader)) != 0xA) && (c != 0xD) && c) { eat_byte_safe(reader, c); } } @@ -1458,14 +1465,17 @@ static void skip_bom(SerdReader* me) { const uint8_t* const b = me->read_buf; - if (b[0] == 0xEF && b[1] == 0xBB && b[2] == 0xBF) { + if (me->paging && b[0] == 0xEF && b[1] == 0xBB && b[2] == 0xBF) { me->read_head += 3; } } SERD_API SerdStatus -serd_reader_read_file_handle(SerdReader* me, FILE* file, const uint8_t* name) +serd_reader_start_stream(SerdReader* me, + FILE* file, + const uint8_t* name, + bool bulk) { const Cursor cur = { name, 1, 1 }; me->fd = file; @@ -1473,19 +1483,62 @@ serd_reader_read_file_handle(SerdReader* me, FILE* file, const uint8_t* name) me->cur = cur; me->from_file = true; me->eof = false; - me->read_buf = (uint8_t*)serd_bufalloc(SERD_PAGE_SIZE); + me->paging = bulk; + + if (bulk) { + me->read_buf = (uint8_t*)serd_bufalloc(SERD_PAGE_SIZE); + memset(me->read_buf, '\0', SERD_PAGE_SIZE); + SerdStatus st = page(me); + if (st) { + serd_reader_end_stream(me); + return st; + } + skip_bom(me); + } else { + me->read_buf = &me->read_byte; + me->read_byte = 0; // Don't read to avoid potentially blocking + } - memset(me->read_buf, '\0', SERD_PAGE_SIZE); + return SERD_SUCCESS; +} - SerdStatus st = page(me); - if (!st) { - skip_bom(me); - st = read_turtleDoc(me) ? SERD_SUCCESS : SERD_ERR_UNKNOWN; +SERD_API +SerdStatus +serd_reader_read_chunk(SerdReader* me) +{ + if (!me->read_byte) { + // Read initial byte + const int c = fgetc(me->fd); + me->read_byte = (c == EOF) ? 0 : (uint8_t)c; + if (c == EOF) { + me->eof = true; + return SERD_FAILURE; + } } + return read_statement(me) ? SERD_SUCCESS : SERD_FAILURE; +} - free(me->read_buf); +SERD_API +SerdStatus +serd_reader_end_stream(SerdReader* me) +{ + if (me->paging) { + free(me->read_buf); + } me->fd = 0; me->read_buf = NULL; + return SERD_SUCCESS; +} + +SERD_API +SerdStatus +serd_reader_read_file_handle(SerdReader* me, FILE* file, const uint8_t* name) +{ + SerdStatus st = serd_reader_start_stream(me, file, name, true); + if (!st) { + st = read_turtleDoc(me) ? SERD_SUCCESS : SERD_ERR_UNKNOWN; + serd_reader_end_stream(me); + } return st; } @@ -1499,6 +1552,7 @@ serd_reader_read_string(SerdReader* me, const uint8_t* utf8) me->read_head = 0; me->cur = cur; me->from_file = false; + me->paging = false; me->eof = false; skip_bom(me); diff --git a/src/serdi.c b/src/serdi.c index 74a84992..4c482884 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -46,6 +46,7 @@ print_usage(const char* name, bool error) fprintf(os, "Use - for INPUT to read from standard input.\n\n"); fprintf(os, " -b Fast bulk output for large serialisations.\n"); fprintf(os, " -c PREFIX Chop PREFIX from matching blank node IDs.\n"); + fprintf(os, " -e Eat input one character at a time.\n"); fprintf(os, " -f Keep full URIs in input (don't qualify).\n"); fprintf(os, " -h Display this help and exit.\n"); fprintf(os, " -i SYNTAX Input syntax (`turtle' or `ntriples').\n"); @@ -89,6 +90,7 @@ main(int argc, char** argv) SerdSyntax input_syntax = SERD_TURTLE; SerdSyntax output_syntax = SERD_NTRIPLES; bool from_file = true; + bool bulk_read = true; bool bulk_write = false; bool full_uris = false; const uint8_t* in_name = NULL; @@ -103,6 +105,8 @@ main(int argc, char** argv) break; } else if (argv[a][1] == 'b') { bulk_write = true; + } else if (argv[a][1] == 'e') { + bulk_read = false; } else if (argv[a][1] == 'f') { full_uris = true; } else if (argv[a][1] == 'h') { @@ -206,9 +210,18 @@ main(int argc, char** argv) serd_writer_chop_blank_prefix(writer, chop_prefix); serd_reader_add_blank_prefix(reader, add_prefix); - const SerdStatus status = (from_file) - ? serd_reader_read_file_handle(reader, in_fd, in_name) - : serd_reader_read_string(reader, input); + SerdStatus status = SERD_SUCCESS; + if (!from_file) { + status = serd_reader_read_string(reader, input); + } else if (bulk_read) { + status = serd_reader_read_file_handle(reader, in_fd, in_name); + } else { + status = serd_reader_start_stream(reader, in_fd, in_name, false); + while (!status) { + status = serd_reader_read_chunk(reader); + } + serd_reader_end_stream(reader); + } serd_reader_free(reader); -- cgit v1.2.1