aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2012-05-08 23:51:51 +0000
committerDavid Robillard <d@drobilla.net>2012-05-08 23:51:51 +0000
commit4de86fdfb643dbe7113e261998c5e159f940b7d4 (patch)
tree0d6b86614f29b68bab7e662b779e474d2654d453
parentb05e48f333d27e434b82587d248824b98f47bcd5 (diff)
downloadserd-4de86fdfb643dbe7113e261998c5e159f940b7d4.tar.gz
serd-4de86fdfb643dbe7113e261998c5e159f940b7d4.tar.bz2
serd-4de86fdfb643dbe7113e261998c5e159f940b7d4.zip
Add incremental read interface suitable for reading from infinite streams.
git-svn-id: http://svn.drobilla.net/serd/trunk@350 490d8e77-9747-427b-9fa3-0b8f29cee8a0
-rw-r--r--NEWS7
-rw-r--r--doc/serdi.110
-rw-r--r--serd/serd.h34
-rw-r--r--src/reader.c78
-rw-r--r--src/serdi.c19
-rw-r--r--wscript4
6 files changed, 135 insertions, 17 deletions
diff --git a/NEWS b/NEWS
index a97bfabb..7b147287 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,10 @@
+serd (9999) unstable;
+
+ * Add incremental read interface suitable for reading from infinite streams.
+ * Add -e option to serdi to use incremental reading.
+
+ -- David Robillard <d@drobilla.net>
+
serd (0.14.0) stable;
* Use path variables in pkgconfig files
diff --git a/doc/serdi.1 b/doc/serdi.1
index 23d77ac3..45001c90 100644
--- a/doc/serdi.1
+++ b/doc/serdi.1
@@ -1,4 +1,4 @@
-.TH SERDI 1 "17 Jan 2012"
+.TH SERDI 1 "08 May 2012"
.SH NAME
.B serdi \- Read and write RDF syntax
@@ -17,6 +17,14 @@ Fast bulk output for large serialisations.
Chop PREFIX from matching blank node IDs.
.TP
+\fB\-e\fR
+Eat input one character at a time, rather than a page at a time which is the
+default. This is useful when reading from a pipe since output will be
+generated immediately as input arrives, rather than waiting until an entire
+page of input has arrived. With this option serdi uses one page less memory,
+but will likely be significantly slower.
+
+.TP
\fB\-f\fR
Keep full URIs in input (don't qualify).
diff --git a/serd/serd.h b/serd/serd.h
index 807664dd..cc2365f4 100644
--- a/serd/serd.h
+++ b/serd/serd.h
@@ -705,6 +705,40 @@ serd_reader_read_file(SerdReader* reader,
const uint8_t* uri);
/**
+ Start an incremental read from a file handle.
+
+ Iff @p bulk is true, @p file will be read a page at a time. This is more
+ efficient, but uses a page of memory and means that an entire page of input
+ must be ready before any callbacks will fire. To react as soon as input
+ arrives, set @p bulk to false.
+*/
+SERD_API
+SerdStatus
+serd_reader_start_stream(SerdReader* me,
+ FILE* file,
+ const uint8_t* name,
+ bool bulk);
+
+/**
+ Read a single "chunk" of data during an incremental read.
+
+ This function will read a single top level description, and return. This
+ may be a directive, statement, or several statements; essentially it reads
+ until a '.' is encountered. This is particularly useful for reading
+ directly from a pipe or socket.
+*/
+SERD_API
+SerdStatus
+serd_reader_read_chunk(SerdReader* me);
+
+/**
+ Finish an incremental read from a file handle.
+*/
+SERD_API
+SerdStatus
+serd_reader_end_stream(SerdReader* me);
+
+/**
Read @c file.
*/
SERD_API
diff --git a/src/reader.c b/src/reader.c
index 70dc7fe5..5c623039 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -78,7 +78,9 @@ struct SerdReaderImpl {
unsigned next_id;
uint8_t* read_buf;
int32_t read_head; ///< Offset into read_buf
+ uint8_t read_byte; ///< 1-byte 'buffer' used when not paging
bool from_file; ///< True iff reading from @ref fd
+ bool paging; ///< True iff reading a page at a time
bool eof;
bool seen_genid;
#ifdef SERD_STACK_CHECK
@@ -124,14 +126,19 @@ static inline uint8_t
eat_byte_safe(SerdReader* reader, const uint8_t byte)
{
assert(peek_byte(reader) == byte);
- ++reader->read_head;
switch (byte) {
case '\0': reader->eof = true; break;
case '\n': ++reader->cur.line; reader->cur.col = 0; break;
default: ++reader->cur.col;
}
- if (reader->from_file && (reader->read_head == SERD_PAGE_SIZE)) {
+ if (reader->from_file && !reader->paging) {
+ const int c = fgetc(reader->fd);
+ reader->read_byte = (c == EOF) ? 0 : (uint8_t)c;
+ if (c == EOF) {
+ reader->eof = true;
+ }
+ } else if (++reader->read_head == SERD_PAGE_SIZE && reader->paging) {
page(reader);
}
return byte;
@@ -548,7 +555,7 @@ read_comment(SerdReader* reader)
{
eat_byte_safe(reader, '#');
uint8_t c;
- while (((c = peek_byte(reader)) != 0xA) && (c != 0xD)) {
+ while (((c = peek_byte(reader)) != 0xA) && (c != 0xD) && c) {
eat_byte_safe(reader, c);
}
}
@@ -1458,14 +1465,17 @@ static void
skip_bom(SerdReader* me)
{
const uint8_t* const b = me->read_buf;
- if (b[0] == 0xEF && b[1] == 0xBB && b[2] == 0xBF) {
+ if (me->paging && b[0] == 0xEF && b[1] == 0xBB && b[2] == 0xBF) {
me->read_head += 3;
}
}
SERD_API
SerdStatus
-serd_reader_read_file_handle(SerdReader* me, FILE* file, const uint8_t* name)
+serd_reader_start_stream(SerdReader* me,
+ FILE* file,
+ const uint8_t* name,
+ bool bulk)
{
const Cursor cur = { name, 1, 1 };
me->fd = file;
@@ -1473,19 +1483,62 @@ serd_reader_read_file_handle(SerdReader* me, FILE* file, const uint8_t* name)
me->cur = cur;
me->from_file = true;
me->eof = false;
- me->read_buf = (uint8_t*)serd_bufalloc(SERD_PAGE_SIZE);
+ me->paging = bulk;
+
+ if (bulk) {
+ me->read_buf = (uint8_t*)serd_bufalloc(SERD_PAGE_SIZE);
+ memset(me->read_buf, '\0', SERD_PAGE_SIZE);
+ SerdStatus st = page(me);
+ if (st) {
+ serd_reader_end_stream(me);
+ return st;
+ }
+ skip_bom(me);
+ } else {
+ me->read_buf = &me->read_byte;
+ me->read_byte = 0; // Don't read to avoid potentially blocking
+ }
- memset(me->read_buf, '\0', SERD_PAGE_SIZE);
+ return SERD_SUCCESS;
+}
- SerdStatus st = page(me);
- if (!st) {
- skip_bom(me);
- st = read_turtleDoc(me) ? SERD_SUCCESS : SERD_ERR_UNKNOWN;
+SERD_API
+SerdStatus
+serd_reader_read_chunk(SerdReader* me)
+{
+ if (!me->read_byte) {
+ // Read initial byte
+ const int c = fgetc(me->fd);
+ me->read_byte = (c == EOF) ? 0 : (uint8_t)c;
+ if (c == EOF) {
+ me->eof = true;
+ return SERD_FAILURE;
+ }
}
+ return read_statement(me) ? SERD_SUCCESS : SERD_FAILURE;
+}
- free(me->read_buf);
+SERD_API
+SerdStatus
+serd_reader_end_stream(SerdReader* me)
+{
+ if (me->paging) {
+ free(me->read_buf);
+ }
me->fd = 0;
me->read_buf = NULL;
+ return SERD_SUCCESS;
+}
+
+SERD_API
+SerdStatus
+serd_reader_read_file_handle(SerdReader* me, FILE* file, const uint8_t* name)
+{
+ SerdStatus st = serd_reader_start_stream(me, file, name, true);
+ if (!st) {
+ st = read_turtleDoc(me) ? SERD_SUCCESS : SERD_ERR_UNKNOWN;
+ serd_reader_end_stream(me);
+ }
return st;
}
@@ -1499,6 +1552,7 @@ serd_reader_read_string(SerdReader* me, const uint8_t* utf8)
me->read_head = 0;
me->cur = cur;
me->from_file = false;
+ me->paging = false;
me->eof = false;
skip_bom(me);
diff --git a/src/serdi.c b/src/serdi.c
index 74a84992..4c482884 100644
--- a/src/serdi.c
+++ b/src/serdi.c
@@ -46,6 +46,7 @@ print_usage(const char* name, bool error)
fprintf(os, "Use - for INPUT to read from standard input.\n\n");
fprintf(os, " -b Fast bulk output for large serialisations.\n");
fprintf(os, " -c PREFIX Chop PREFIX from matching blank node IDs.\n");
+ fprintf(os, " -e Eat input one character at a time.\n");
fprintf(os, " -f Keep full URIs in input (don't qualify).\n");
fprintf(os, " -h Display this help and exit.\n");
fprintf(os, " -i SYNTAX Input syntax (`turtle' or `ntriples').\n");
@@ -89,6 +90,7 @@ main(int argc, char** argv)
SerdSyntax input_syntax = SERD_TURTLE;
SerdSyntax output_syntax = SERD_NTRIPLES;
bool from_file = true;
+ bool bulk_read = true;
bool bulk_write = false;
bool full_uris = false;
const uint8_t* in_name = NULL;
@@ -103,6 +105,8 @@ main(int argc, char** argv)
break;
} else if (argv[a][1] == 'b') {
bulk_write = true;
+ } else if (argv[a][1] == 'e') {
+ bulk_read = false;
} else if (argv[a][1] == 'f') {
full_uris = true;
} else if (argv[a][1] == 'h') {
@@ -206,9 +210,18 @@ main(int argc, char** argv)
serd_writer_chop_blank_prefix(writer, chop_prefix);
serd_reader_add_blank_prefix(reader, add_prefix);
- const SerdStatus status = (from_file)
- ? serd_reader_read_file_handle(reader, in_fd, in_name)
- : serd_reader_read_string(reader, input);
+ SerdStatus status = SERD_SUCCESS;
+ if (!from_file) {
+ status = serd_reader_read_string(reader, input);
+ } else if (bulk_read) {
+ status = serd_reader_read_file_handle(reader, in_fd, in_name);
+ } else {
+ status = serd_reader_start_stream(reader, in_fd, in_name, false);
+ while (!status) {
+ status = serd_reader_read_chunk(reader);
+ }
+ serd_reader_end_stream(reader);
+ }
serd_reader_free(reader);
diff --git a/wscript b/wscript
index d09612f7..c3db493f 100644
--- a/wscript
+++ b/wscript
@@ -9,7 +9,7 @@ from waflib.extras import autowaf as autowaf
import waflib.Logs as Logs, waflib.Options as Options
# Version of this package (even if built as a child)
-SERD_VERSION = '0.14.0'
+SERD_VERSION = '0.15.0'
SERD_MAJOR_VERSION = '0'
# Library version (UNIX style major, minor, micro)
@@ -389,6 +389,8 @@ def test(ctx):
flags += ' -f'
if (num % 3 == 0):
flags += ' -r http://www.w3.org/'
+ if (num % 7 == 0):
+ flags += ' -e'
base_uri = 'http://www.w3.org/2001/sw/DataAccess/df1/' + test.replace('\\', '/')
out_filename = test + '.thru'
commands += [