aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2018-02-10 13:32:06 +0100
committerDavid Robillard <d@drobilla.net>2018-05-27 18:21:57 +0200
commit6042bd82cad4fbe4493218a8f67e7e6904ed81d5 (patch)
tree6a7a23ef81d4117fad118738a49f6af435160b79
parent0dc150a9e5dbfed959041e46a1f1310f6bcc9604 (diff)
downloadserd-6042bd82cad4fbe4493218a8f67e7e6904ed81d5.tar.gz
serd-6042bd82cad4fbe4493218a8f67e7e6904ed81d5.tar.bz2
serd-6042bd82cad4fbe4493218a8f67e7e6904ed81d5.zip
Simplify reader interface
-rw-r--r--serd/serd.h54
-rw-r--r--src/n3.c28
-rw-r--r--src/reader.c116
-rw-r--r--src/serd_internal.h4
-rw-r--r--src/serdi.c33
5 files changed, 99 insertions, 136 deletions
diff --git a/serd/serd.h b/serd/serd.h
index c8aabb8d..3e2bc527 100644
--- a/serd/serd.h
+++ b/serd/serd.h
@@ -838,34 +838,26 @@ serd_reader_read_file(SerdReader* reader,
const char* uri);
/**
- Start an incremental read from a file handle.
+ Prepare to read from a stream.
- Iff `bulk` is true, `file` will be read a page at a time. This is more
- efficient, but uses a page of memory and means that an entire page of input
- must be ready before any callbacks will fire. To react as soon as input
- arrives, set `bulk` to false.
+ The `read_func` is guaranteed to only be called for `page_size` elements
+ with size 1 (i.e. `page_size` bytes).
*/
SERD_API
SerdStatus
-serd_reader_start_stream(SerdReader* reader,
- FILE* file,
- const char* name,
- bool bulk);
+serd_reader_start_stream(SerdReader* reader,
+ SerdSource read_func,
+ SerdStreamErrorFunc error_func,
+ void* stream,
+ const char* name,
+ size_t page_size);
/**
- Start an incremental read from a user-specified source.
-
- The `read_func` is guaranteed to only be called for `page_size` elements
- with size 1 (i.e. `page_size` bytes).
+ Prepare to read from a string.
*/
SERD_API
SerdStatus
-serd_reader_start_source_stream(SerdReader* reader,
- SerdSource read_func,
- SerdStreamErrorFunc error_func,
- void* stream,
- const char* name,
- size_t page_size);
+serd_reader_start_string(SerdReader* reader, const char* utf8);
/**
Read a single "chunk" of data during an incremental read.
@@ -880,32 +872,22 @@ SerdStatus
serd_reader_read_chunk(SerdReader* reader);
/**
- Finish an incremental read from a file handle.
-*/
-SERD_API
-SerdStatus
-serd_reader_end_stream(SerdReader* reader);
+ Read a complete document from the source.
-/**
- Read `file`.
+ This function will continue pulling from the source until a complete
+ document has been read. Note that this may block when used with streams,
+ for incremental reading use serd_reader_read_chunk().
*/
SERD_API
SerdStatus
-serd_reader_read_file_handle(SerdReader* reader,
- FILE* file,
- const char* name);
+serd_reader_read_document(SerdReader* reader);
/**
- Read a user-specified byte source.
+ Finish reading from the source.
*/
SERD_API
SerdStatus
-serd_reader_read_source(SerdReader* reader,
- SerdSource source,
- SerdStreamErrorFunc error,
- void* stream,
- const char* name,
- size_t page_size);
+serd_reader_end_stream(SerdReader* reader);
/**
Read `utf8`.
diff --git a/src/n3.c b/src/n3.c
index 9c7784e1..b65bf1a5 100644
--- a/src/n3.c
+++ b/src/n3.c
@@ -1430,22 +1430,22 @@ skip_until(SerdReader* reader, uint8_t byte)
}
}
-bool
+SerdStatus
read_turtleTrigDoc(SerdReader* reader)
{
while (!reader->source.eof) {
if (!read_n3_statement(reader)) {
if (reader->strict) {
- return 0;
+ return SERD_ERR_UNKNOWN;
}
skip_until(reader, '\n');
reader->status = SERD_SUCCESS;
}
}
- return reader->status <= SERD_FAILURE;
+ return reader->status;
}
-bool
+SerdStatus
read_nquadsDoc(SerdReader* reader)
{
while (!reader->source.eof) {
@@ -1458,8 +1458,9 @@ read_nquadsDoc(SerdReader* reader)
reader->source.eof = true;
break;
} else if (peek_byte(reader) == '@') {
- return r_err(reader, SERD_ERR_BAD_SYNTAX,
- "syntax does not support directives\n");
+ r_err(reader, SERD_ERR_BAD_SYNTAX,
+ "syntax does not support directives\n");
+ return SERD_ERR_BAD_SYNTAX;
}
// subject predicate object
@@ -1468,11 +1469,11 @@ read_nquadsDoc(SerdReader* reader)
!(ctx.predicate = read_IRIREF(reader)) ||
!read_ws_star(reader) ||
!read_object(reader, &ctx, false, &ate_dot)) {
- return false;
+ return SERD_ERR_UNKNOWN;
}
if (!ate_dot) { // graphLabel?
- TRY_RET(read_ws_star(reader));
+ read_ws_star(reader);
switch (peek_byte(reader)) {
case '.':
break;
@@ -1481,20 +1482,23 @@ read_nquadsDoc(SerdReader* reader)
break;
default:
if (!(ctx.graph = read_IRIREF(reader))) {
- return false;
+ return SERD_ERR_UNKNOWN;
}
}
// Terminating '.'
- TRY_RET(read_ws_star(reader));
+ read_ws_star(reader);
eat_byte_check(reader, '.');
}
- TRY_RET(emit_statement(reader, ctx, ctx.object, ctx.datatype, ctx.lang));
+ if (!emit_statement(reader, ctx, ctx.object, ctx.datatype, ctx.lang)) {
+ break;
+ }
+
pop_node(reader, ctx.graph);
pop_node(reader, ctx.lang);
pop_node(reader, ctx.datatype);
pop_node(reader, ctx.object);
}
- return reader->status <= SERD_FAILURE;
+ return reader->status;
}
diff --git a/src/reader.c b/src/reader.c
index cedf84cf..8c43f80a 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -24,6 +24,8 @@
#include <stdlib.h>
#include <string.h>
+static SerdStatus serd_reader_prepare(SerdReader* reader);
+
int
r_err(SerdReader* reader, SerdStatus st, const char* fmt, ...)
{
@@ -59,19 +61,6 @@ blank_id(SerdReader* reader)
return ref;
}
-/** fread-like wrapper for getc (which is faster). */
-static size_t
-serd_file_read_byte(void* buf, size_t size, size_t nmemb, void* stream)
-{
- const int c = getc((FILE*)stream);
- if (c == EOF) {
- *((uint8_t*)buf) = 0;
- return 0;
- }
- *((uint8_t*)buf) = (uint8_t)c;
- return 1;
-}
-
Ref
push_node_padded(SerdReader* reader, size_t maxlen,
SerdType type, const char* str, size_t n_bytes)
@@ -147,9 +136,17 @@ read_statement(SerdReader* reader)
}
}
-static bool
-read_doc(SerdReader* reader)
+SERD_API
+SerdStatus
+serd_reader_read_document(SerdReader* reader)
{
+ if (!reader->source.prepared) {
+ SerdStatus st = serd_reader_prepare(reader);
+ if (st) {
+ return st;
+ }
+ }
+
switch (reader->syntax) {
case SERD_NQUADS: return read_nquadsDoc(reader);
default: return read_turtleTrigDoc(reader);
@@ -270,10 +267,20 @@ serd_reader_read_file(SerdReader* reader,
return SERD_ERR_UNKNOWN;
}
- SerdStatus ret = serd_reader_read_file_handle(reader, fd, path);
+ SerdStatus st = serd_reader_start_stream(
+ reader, (SerdSource)fread, (SerdStreamErrorFunc)ferror,
+ fd, path, SERD_PAGE_SIZE);
+
+ if (!st) {
+ st = serd_reader_read_document(reader);
+ }
+
+ const SerdStatus est = serd_reader_end_stream(reader);
+
fclose(fd);
free(path);
- return ret;
+
+ return st ? st : est;
}
static SerdStatus
@@ -295,31 +302,22 @@ skip_bom(SerdReader* me)
SERD_API
SerdStatus
-serd_reader_start_stream(SerdReader* reader,
- FILE* file,
- const char* name,
- bool bulk)
+serd_reader_start_stream(SerdReader* reader,
+ SerdSource read_func,
+ SerdStreamErrorFunc error_func,
+ void* stream,
+ const char* name,
+ size_t page_size)
{
- return serd_reader_start_source_stream(
- reader,
- bulk ? (SerdSource)fread : serd_file_read_byte,
- (SerdStreamErrorFunc)ferror,
- file,
- name,
- bulk ? SERD_PAGE_SIZE : 1);
+ return serd_byte_source_open_source(
+ &reader->source, read_func, error_func, stream, name, page_size);
}
SERD_API
SerdStatus
-serd_reader_start_source_stream(SerdReader* reader,
- SerdSource read_func,
- SerdStreamErrorFunc error_func,
- void* stream,
- const char* name,
- size_t page_size)
+serd_reader_start_string(SerdReader* reader, const char* utf8)
{
- return serd_byte_source_open_source(
- &reader->source, read_func, error_func, stream, name, page_size);
+ return serd_byte_source_open_string(&reader->source, utf8);
}
static SerdStatus
@@ -363,50 +361,12 @@ serd_reader_end_stream(SerdReader* reader)
SERD_API
SerdStatus
-serd_reader_read_file_handle(SerdReader* reader,
- FILE* file,
- const char* name)
-{
- return serd_reader_read_source(
- reader, (SerdSource)fread, (SerdStreamErrorFunc)ferror,
- file, name, SERD_PAGE_SIZE);
-}
-
-SERD_API
-SerdStatus
-serd_reader_read_source(SerdReader* reader,
- SerdSource source,
- SerdStreamErrorFunc error,
- void* stream,
- const char* name,
- size_t page_size)
-{
- SerdStatus st = serd_reader_start_source_stream(
- reader, source, error, stream, name, page_size);
-
- if (st || (st = serd_reader_prepare(reader))) {
- serd_reader_end_stream(reader);
- return st;
- } else if (!read_doc(reader)) {
- serd_reader_end_stream(reader);
- return SERD_ERR_UNKNOWN;
- }
-
- return serd_reader_end_stream(reader);
-}
-
-SERD_API
-SerdStatus
serd_reader_read_string(SerdReader* reader, const char* utf8)
{
- serd_byte_source_open_string(&reader->source, utf8);
-
- SerdStatus st = serd_reader_prepare(reader);
- if (!st) {
- st = read_doc(reader) ? SERD_SUCCESS : SERD_ERR_UNKNOWN;
- }
+ serd_reader_start_string(reader, utf8);
- serd_byte_source_close(&reader->source);
+ const SerdStatus st = serd_reader_read_document(reader);
+ const SerdStatus est = serd_byte_source_close(&reader->source);
- return st;
+ return st ? st : est;
}
diff --git a/src/serd_internal.h b/src/serd_internal.h
index 5be89a12..6e0d7a8c 100644
--- a/src/serd_internal.h
+++ b/src/serd_internal.h
@@ -569,8 +569,8 @@ Ref pop_node(SerdReader* reader, Ref ref);
bool emit_statement(SerdReader* reader, ReadContext ctx, Ref o, Ref d, Ref l);
bool read_n3_statement(SerdReader* reader);
-bool read_nquadsDoc(SerdReader* reader);
-bool read_turtleTrigDoc(SerdReader* reader);
+SerdStatus read_nquadsDoc(SerdReader* reader);
+SerdStatus read_turtleTrigDoc(SerdReader* reader);
typedef enum {
FIELD_NONE,
diff --git a/src/serdi.c b/src/serdi.c
index ec66cfd3..caa1d9af 100644
--- a/src/serdi.c
+++ b/src/serdi.c
@@ -113,6 +113,19 @@ quiet_error_sink(void* handle, const SerdError* e)
return SERD_SUCCESS;
}
+/** fread-like wrapper for getc (which is faster). */
+static size_t
+serd_file_read_byte(void* buf, size_t size, size_t nmemb, void* stream)
+{
+ const int c = getc((FILE*)stream);
+ if (c == EOF) {
+ *((uint8_t*)buf) = 0;
+ return 0;
+ }
+ *((uint8_t*)buf) = (uint8_t)c;
+ return 1;
+}
+
int
main(int argc, char** argv)
{
@@ -282,17 +295,21 @@ main(int argc, char** argv)
SerdStatus status = SERD_SUCCESS;
if (!from_file) {
- status = serd_reader_read_string(reader, input);
- } else if (bulk_read) {
- status = serd_reader_read_file_handle(reader, in_fd, in_name);
+ status = serd_reader_start_string(reader, input);
} else {
- status = serd_reader_start_stream(reader, in_fd, in_name, false);
- while (!status) {
- status = serd_reader_read_chunk(reader);
- }
- serd_reader_end_stream(reader);
+ status = serd_reader_start_stream(
+ reader,
+ bulk_read ? (SerdSource)fread : serd_file_read_byte,
+ (SerdStreamErrorFunc)ferror,
+ in_fd,
+ in_name,
+ bulk_read ? SERD_PAGE_SIZE : 1);
}
+ status = serd_reader_read_document(reader);
+
+ serd_reader_end_stream(reader);
+
serd_reader_free(reader);
serd_writer_finish(writer);
serd_writer_free(writer);