aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2017-04-14 10:51:46 +0200
committerDavid Robillard <d@drobilla.net>2017-04-14 20:58:03 +0200
commit1ae793464e0cfb71dedc9807d97132fdc6145005 (patch)
tree5027e4f929ccfb1a3abd488d5f3498f0cc3fe850
parenta5005d0e75bc2ea9f453d5e35f611f078f200b8b (diff)
downloadserd-1ae793464e0cfb71dedc9807d97132fdc6145005.tar.gz
serd-1ae793464e0cfb71dedc9807d97132fdc6145005.tar.bz2
serd-1ae793464e0cfb71dedc9807d97132fdc6145005.zip
Factor out file reading from reader
-rw-r--r--NEWS6
-rw-r--r--serd/serd.h52
-rw-r--r--src/byte_source.c107
-rw-r--r--src/reader.c177
-rw-r--r--src/serd_internal.h46
-rw-r--r--wscript3
6 files changed, 307 insertions, 84 deletions
diff --git a/NEWS b/NEWS
index 44a6f9cb..5e8047a4 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,9 @@
+serd (0.27.0) unstable;
+
+ * Add support for reading from a user provided callback
+
+ -- David Robillard <d@drobilla.net> Fri, 14 Apr 2017 20:30:51 +0200
+
serd (0.26.0) stable;
* Add support for TriG and NQuads
diff --git a/serd/serd.h b/serd/serd.h
index e7f6ec3b..0ba3dffd 100644
--- a/serd/serd.h
+++ b/serd/serd.h
@@ -377,6 +377,32 @@ void
serd_uri_resolve(const SerdURI* uri, const SerdURI* base, SerdURI* out);
/**
+ Function to detect I/O stream errors.
+
+ Identical semantics to `ferror`.
+
+ @return Non-zero if `stream` has encountered an error.
+*/
+typedef int (*SerdStreamErrorFunc)(void* stream);
+
+/**
+ Source function for raw string input.
+
+ Identical semantics to `fread`, but may set errno for more informative error
+ reporting than supported by SerdStreamErrorFunc.
+
+ @param buf Output buffer.
+ @param size Size of a single element of data in bytes (always 1).
+ @param nmemb Number of elements to read.
+ @param stream Stream to read from (FILE* for fread).
+ @return Number of elements (bytes) read.
+*/
+typedef size_t (*SerdSource)(void* buf,
+ size_t size,
+ size_t nmemb,
+ void* stream);
+
+/**
Sink function for raw string output.
*/
typedef size_t (*SerdSink)(const void* buf, size_t len, void* stream);
@@ -799,6 +825,21 @@ serd_reader_start_stream(SerdReader* me,
bool bulk);
/**
+ Start an incremental read from a user-specified source.
+
+ Iff `bulk` is true, `source` will be read a page at a time. Otherwise,
+ `source` is guaranteed to only be called for single bytes.
+*/
+SERD_API
+SerdStatus
+serd_reader_start_source_stream(SerdReader* me,
+ SerdSource read_func,
+ SerdStreamErrorFunc error_func,
+ void* stream,
+ const uint8_t* name,
+ bool bulk);
+
+/**
Read a single "chunk" of data during an incremental read.
This function will read a single top level description, and return. This
@@ -827,6 +868,17 @@ serd_reader_read_file_handle(SerdReader* reader,
const uint8_t* name);
/**
+ Read a user-specified byte source.
+*/
+SERD_API
+SerdStatus
+serd_reader_read_source(SerdReader* reader,
+ SerdSource source,
+ SerdStreamErrorFunc error,
+ void* stream,
+ const uint8_t* name);
+
+/**
Read `utf8`.
*/
SERD_API
diff --git a/src/byte_source.c b/src/byte_source.c
new file mode 100644
index 00000000..e5bb47cf
--- /dev/null
+++ b/src/byte_source.c
@@ -0,0 +1,107 @@
+/*
+ Copyright 2011-2017 David Robillard <http://drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#include "serd_internal.h"
+
+static inline SerdStatus
+serd_byte_source_page(SerdByteSource* source)
+{
+ source->read_head = 0;
+ size_t n_read = source->read_func(
+ source->file_buf, 1, SERD_PAGE_SIZE, source->stream);
+ if (n_read == 0) {
+ source->file_buf[0] = '\0';
+ return (source->error_func(source->stream)
+ ? SERD_ERR_UNKNOWN : SERD_FAILURE);
+ } else if (n_read < SERD_PAGE_SIZE) {
+ source->file_buf[n_read] = '\0';
+ }
+ return SERD_SUCCESS;
+}
+
+SerdStatus
+serd_byte_source_open_source(SerdByteSource* source,
+ SerdSource read_func,
+ SerdStreamErrorFunc error_func,
+ void* stream,
+ bool bulk)
+{
+ memset(source, '\0', sizeof(*source));
+ source->stream = stream;
+ source->from_stream = true;
+ source->paging = bulk;
+ source->error_func = error_func;
+ source->read_func = read_func;
+
+ if (bulk) {
+ source->file_buf = (uint8_t*)serd_bufalloc(SERD_PAGE_SIZE);
+ source->read_buf = source->file_buf;
+ memset(source->file_buf, '\0', SERD_PAGE_SIZE);
+ } else {
+ source->read_buf = &source->read_byte;
+ }
+
+ return SERD_SUCCESS;
+}
+
+SerdStatus
+serd_byte_source_prepare(SerdByteSource* source)
+{
+ if (!source->prepared) {
+ source->prepared = true;
+ if (source->paging) {
+ return serd_byte_source_page(source);
+ } else if (source->from_stream) {
+ return serd_byte_source_advance(source);
+ }
+ }
+ return SERD_SUCCESS;
+}
+
+SerdStatus
+serd_byte_source_open_string(SerdByteSource* source, const uint8_t* utf8)
+{
+ memset(source, '\0', sizeof(*source));
+ source->read_buf = utf8;
+ source->prepared = true;
+ return SERD_SUCCESS;
+}
+
+SerdStatus
+serd_byte_source_close(SerdByteSource* source)
+{
+ if (source->paging) {
+ free(source->file_buf);
+ }
+ memset(source, '\0', sizeof(*source));
+ return SERD_SUCCESS;
+}
+
+SerdStatus
+serd_byte_source_advance(SerdByteSource* source)
+{
+ SerdStatus st = SERD_SUCCESS;
+ if (source->from_stream && !source->paging) {
+ if (source->read_func(&source->read_byte, 1, 1, source->stream) == 0) {
+ return (source->error_func(source->stream)
+ ? SERD_ERR_UNKNOWN : SERD_FAILURE);
+ }
+ } else if (++source->read_head == SERD_PAGE_SIZE && source->paging) {
+ st = serd_byte_source_page(source);
+ }
+
+ return st;
+}
diff --git a/src/reader.c b/src/reader.c
index 11353a13..6b99ebf0 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -74,7 +74,7 @@ struct SerdReaderImpl {
Ref rdf_rest;
Ref rdf_nil;
SerdNode default_graph;
- FILE* fd;
+ SerdByteSource source;
SerdStack stack;
SerdSyntax syntax;
unsigned next_id;
@@ -83,12 +83,6 @@ struct SerdReaderImpl {
uint8_t* buf;
uint8_t* bprefix;
size_t bprefix_len;
- const uint8_t* read_buf;
- uint8_t* file_buf;
- int32_t read_head; ///< Offset into read_buf
- uint8_t read_byte; ///< 1-byte 'buffer' used when not paging
- bool from_file; ///< True iff reading from `fd`
- bool paging; ///< True iff reading a page at a time
bool strict; ///< True iff strict parsing
bool eof;
bool seen_genid;
@@ -123,25 +117,23 @@ r_err(SerdReader* reader, SerdStatus st, const char* fmt, ...)
return 0;
}
-static inline SerdStatus
-page(SerdReader* reader)
+/** fread-like wrapper for getc (which is faster). */
+static size_t
+serd_file_read_byte(void* buf, size_t size, size_t nmemb, void* stream)
{
- reader->read_head = 0;
- size_t n_read = fread(reader->file_buf, 1, SERD_PAGE_SIZE, reader->fd);
- if (n_read == 0) {
- reader->file_buf[0] = '\0';
- reader->eof = true;
- return ferror(reader->fd) ? SERD_ERR_UNKNOWN : SERD_FAILURE;
- } else if (n_read < SERD_PAGE_SIZE) {
- reader->file_buf[n_read] = '\0';
+ const int c = getc((FILE*)stream);
+ if (c == EOF) {
+ *((uint8_t*)buf) = 0;
+ return 0;
}
- return SERD_SUCCESS;
+ *((uint8_t*)buf) = (uint8_t)c;
+ return 1;
}
static inline uint8_t
peek_byte(SerdReader* reader)
{
- return reader->read_buf[reader->read_head];
+ return serd_byte_source_peek(&reader->source);
}
static inline uint8_t
@@ -154,19 +146,7 @@ eat_byte_safe(SerdReader* reader, const uint8_t byte)
default: ++reader->cur.col;
}
- if (reader->from_file && !reader->paging) {
- const int c = fgetc(reader->fd);
- reader->read_byte = (c == EOF) ? 0 : (uint8_t)c;
- if (c == EOF) {
- reader->eof = true;
- }
- } else if (++reader->read_head == SERD_PAGE_SIZE && reader->paging) {
- const SerdStatus st = page(reader);
- if (st > SERD_FAILURE) {
- reader->status = st;
- r_err(reader, st, "read error: %s\n", strerror(errno));
- }
- }
+ reader->status = serd_byte_source_advance(&reader->source);
return byte;
}
@@ -1827,13 +1807,18 @@ serd_reader_read_file(SerdReader* reader,
return ret;
}
-static void
+static bool
skip_bom(SerdReader* me)
{
- const uint8_t* const b = me->read_buf;
- if (me->paging && b[0] == 0xEF && b[1] == 0xBB && b[2] == 0xBF) {
- me->read_head += 3;
+ if (peek_byte(me) == 0xEF) {
+ eat_byte_safe(me, 0xEF);
+ if (eat_byte_check(me, 0xBB) != 0xBB ||
+ eat_byte_check(me, 0xBF) != 0xBF) {
+ return r_err(me, SERD_ERR_BAD_SYNTAX, "corrupt byte order mark\n");
+ }
}
+
+ return true;
}
SERD_API
@@ -1843,43 +1828,56 @@ serd_reader_start_stream(SerdReader* me,
const uint8_t* name,
bool bulk)
{
+ return serd_reader_start_source_stream(
+ me,
+ bulk ? (SerdSource)fread : serd_file_read_byte,
+ (SerdStreamErrorFunc)ferror,
+ file,
+ name,
+ bulk);
+}
+
+SERD_API
+SerdStatus
+serd_reader_start_source_stream(SerdReader* me,
+ SerdSource read_func,
+ SerdStreamErrorFunc error_func,
+ void* stream,
+ const uint8_t* name,
+ bool bulk)
+{
const Cursor cur = { name, 1, 1 };
- me->fd = file;
- me->read_head = 0;
- me->cur = cur;
- me->from_file = true;
- me->eof = false;
- me->paging = bulk;
-
- if (bulk) {
- me->file_buf = (uint8_t*)serd_bufalloc(SERD_PAGE_SIZE);
- me->read_buf = me->file_buf;
- memset(me->file_buf, '\0', SERD_PAGE_SIZE);
- SerdStatus st = page(me);
- if (st) {
- serd_reader_end_stream(me);
- return st;
- }
- skip_bom(me);
- } else {
- me->read_buf = &me->read_byte;
- me->read_byte = 0; // Don't read to avoid potentially blocking
- }
+ me->cur = cur;
- return SERD_SUCCESS;
+ return serd_byte_source_open_source(
+ &me->source, read_func, error_func, stream, bulk);
+}
+
+static SerdStatus
+serd_reader_prepare(SerdReader* me)
+{
+ me->eof = false;
+ if ((me->status = serd_byte_source_prepare(&me->source))) {
+ r_err(me, me->status, "read error: %s\n", strerror(errno));
+ } else if (!skip_bom(me)) {
+ me->status = SERD_ERR_BAD_SYNTAX;
+ }
+ return me->status;
}
SERD_API
SerdStatus
serd_reader_read_chunk(SerdReader* me)
{
- if (!me->read_byte) {
- // Read initial byte
- const int c = fgetc(me->fd);
- me->read_byte = (c == EOF) ? 0 : (uint8_t)c;
- if (c == EOF) {
- me->eof = true;
- return SERD_FAILURE;
+ SerdStatus st = SERD_SUCCESS;
+ if (!me->source.prepared) {
+ if ((st = serd_reader_prepare(me))) {
+ return st;
+ }
+ } else if (me->eof) {
+ me->eof = false;
+ if ((st = serd_byte_source_advance(&me->source))) {
+ return st;
}
}
return read_statement(me) ? SERD_SUCCESS : SERD_FAILURE;
@@ -1889,24 +1887,37 @@ SERD_API
SerdStatus
serd_reader_end_stream(SerdReader* me)
{
- if (me->paging) {
- free(me->file_buf);
- }
- me->fd = NULL;
- me->read_buf = me->file_buf = NULL;
- return SERD_SUCCESS;
+ return serd_byte_source_close(&me->source);
}
SERD_API
SerdStatus
serd_reader_read_file_handle(SerdReader* me, FILE* file, const uint8_t* name)
{
- SerdStatus st = serd_reader_start_stream(me, file, name, true);
- if (!st) {
- st = read_doc(me) ? SERD_SUCCESS : SERD_ERR_UNKNOWN;
+ return serd_reader_read_source(
+ me, (SerdSource)fread, (SerdStreamErrorFunc)ferror, file, name);
+}
+
+SERD_API
+SerdStatus
+serd_reader_read_source(SerdReader* me,
+ SerdSource source,
+ SerdStreamErrorFunc error,
+ void* stream,
+ const uint8_t* name)
+{
+ SerdStatus st = serd_reader_start_source_stream(
+ me, source, error, stream, name, true);
+
+ if ((st = serd_reader_prepare(me))) {
serd_reader_end_stream(me);
+ return st;
+ } else if (!read_doc(me)) {
+ serd_reader_end_stream(me);
+ return SERD_ERR_UNKNOWN;
}
- return st;
+
+ return serd_reader_end_stream(me);
}
SERD_API
@@ -1915,16 +1926,16 @@ serd_reader_read_string(SerdReader* me, const uint8_t* utf8)
{
const Cursor cur = { (const uint8_t*)"(string)", 1, 1 };
- me->read_buf = utf8;
- me->read_head = 0;
+ serd_byte_source_open_string(&me->source, utf8);
me->cur = cur;
- me->from_file = false;
- me->paging = false;
me->eof = false;
- skip_bom(me);
- const bool ret = read_doc(me);
+ SerdStatus st = serd_reader_prepare(me);
+ if (!st) {
+ st = read_doc(me) ? SERD_SUCCESS : SERD_ERR_UNKNOWN;
+ }
+
+ serd_byte_source_close(&me->source);
- me->read_buf = NULL;
- return ret ? SERD_SUCCESS : SERD_ERR_UNKNOWN;
+ return st;
}
diff --git a/src/serd_internal.h b/src/serd_internal.h
index b942072c..ef83f03c 100644
--- a/src/serd_internal.h
+++ b/src/serd_internal.h
@@ -66,6 +66,52 @@ serd_bufalloc(size_t size)
#endif
}
+/* Byte source */
+
+typedef struct {
+ SerdSource read_func; ///< Read function (e.g. fread)
+ SerdStreamErrorFunc error_func; ///< Error function (e.g. ferror)
+ void* stream; ///< Stream (e.g. FILE)
+ uint8_t* file_buf; ///< Buffer iff reading pages from a file
+ const uint8_t* read_buf; ///< Pointer to file_buf or read_byte
+ size_t read_head; ///< Offset into read_buf
+ uint8_t read_byte; ///< 1-byte 'buffer' used when not paging
+ bool from_stream; ///< True iff reading from `stream`
+ bool paging; ///< True iff reading a page at a time
+ bool prepared; ///< True iff prepared for reading
+} SerdByteSource;
+
+SerdStatus
+serd_byte_source_open_file(SerdByteSource* source,
+ FILE* file,
+ bool bulk);
+
+SerdStatus
+serd_byte_source_open_string(SerdByteSource* source, const uint8_t* utf8);
+
+SerdStatus
+serd_byte_source_open_source(SerdByteSource* source,
+ SerdSource read_func,
+ SerdStreamErrorFunc error_func,
+ void* stream,
+ bool bulk);
+
+SerdStatus
+serd_byte_source_close(SerdByteSource* source);
+
+SerdStatus
+serd_byte_source_prepare(SerdByteSource* source);
+
+static inline uint8_t
+serd_byte_source_peek(SerdByteSource* source)
+{
+ assert(source->prepared);
+ return source->read_buf[source->read_head];
+}
+
+SerdStatus
+serd_byte_source_advance(SerdByteSource* source);
+
/* Stack */
/** A dynamic stack in memory. */
diff --git a/wscript b/wscript
index 738c66af..60970e69 100644
--- a/wscript
+++ b/wscript
@@ -11,7 +11,7 @@ import waflib.extras.autowaf as autowaf
# major increment <=> incompatible changes
# minor increment <=> compatible changes (additions)
# micro increment <=> no interface changes
-SERD_VERSION = '0.26.0'
+SERD_VERSION = '0.27.0'
SERD_MAJOR_VERSION = '0'
# Mandatory waf variables
@@ -87,6 +87,7 @@ def configure(conf):
print('')
lib_source = [
+ 'src/byte_source.c',
'src/env.c',
'src/node.c',
'src/reader.c',