aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2020-08-16 12:42:58 +0200
committerDavid Robillard <d@drobilla.net>2021-03-07 15:32:24 -0500
commit9a8e06aa5bdc62ed589bd8ed5789bd059cec0700 (patch)
treeba5263262fad84c5f58bd60f3c6f6fcba30d6516
parentd4ae57afbd7da668dbf89f7b7e66e2f064437a98 (diff)
downloadserd-9a8e06aa5bdc62ed589bd8ed5789bd059cec0700.tar.gz
serd-9a8e06aa5bdc62ed589bd8ed5789bd059cec0700.tar.bz2
serd-9a8e06aa5bdc62ed589bd8ed5789bd059cec0700.zip
Simplify reader interface
-rw-r--r--include/serd/serd.h60
-rw-r--r--src/n3.c3
-rw-r--r--src/reader.c124
-rw-r--r--src/serdi.c39
-rw-r--r--test/test_read_chunk.c38
-rw-r--r--test/test_reader_writer.c19
6 files changed, 121 insertions, 162 deletions
diff --git a/include/serd/serd.h b/include/serd/serd.h
index c2c9d52d..000a8ef8 100644
--- a/include/serd/serd.h
+++ b/include/serd/serd.h
@@ -23,7 +23,6 @@
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
-#include <stdio.h>
#if defined(_WIN32) && !defined(SERD_STATIC) && defined(SERD_INTERNAL)
# define SERD_API __declspec(dllexport)
@@ -877,34 +876,25 @@ serd_reader_read_file(SerdReader* SERD_NONNULL reader,
const char* SERD_NONNULL uri);
/**
- Start an incremental read from a file handle.
-
- Iff `bulk` is true, `file` will be read a page at a time. This is more
- efficient, but uses a page of memory and means that an entire page of input
- must be ready before any callbacks will fire. To react as soon as input
- arrives, set `bulk` to false.
-*/
-SERD_API
-SerdStatus
-serd_reader_start_stream(SerdReader* SERD_NONNULL reader,
- FILE* SERD_NONNULL file,
- const char* SERD_NULLABLE name,
- bool bulk);
-
-/**
- Start an incremental read from a user-specified source.
+ Prepare to read from a stream.
The `read_func` is guaranteed to only be called for `page_size` elements
with size 1 (i.e. `page_size` bytes).
*/
SERD_API
SerdStatus
-serd_reader_start_source_stream(SerdReader* SERD_NONNULL reader,
- SerdSource SERD_NONNULL read_func,
- SerdStreamErrorFunc SERD_NONNULL error_func,
- void* SERD_NONNULL stream,
- const char* SERD_NULLABLE name,
- size_t page_size);
+serd_reader_start_stream(SerdReader* SERD_NONNULL reader,
+ SerdSource SERD_NONNULL read_func,
+ SerdStreamErrorFunc SERD_NONNULL error_func,
+ void* SERD_NONNULL stream,
+ const char* SERD_NULLABLE name,
+ size_t page_size);
+
+/// Prepare to read from a string
+SERD_API
+SerdStatus
+serd_reader_start_string(SerdReader* SERD_NONNULL reader,
+ const char* SERD_NONNULL utf8);
/**
Read a single "chunk" of data during an incremental read
@@ -918,27 +908,21 @@ SERD_API
SerdStatus
serd_reader_read_chunk(SerdReader* SERD_NONNULL reader);
-/// Finish an incremental read from a file handle
-SERD_API
-SerdStatus
-serd_reader_end_stream(SerdReader* SERD_NONNULL reader);
+/**
+ Read a complete document from the source.
-/// Read `file`
+ This function will continue pulling from the source until a complete
+ document has been read. Note that this may block when used with streams,
+ for incremental reading use serd_reader_read_chunk().
+*/
SERD_API
SerdStatus
-serd_reader_read_file_handle(SerdReader* SERD_NONNULL reader,
- FILE* SERD_NONNULL file,
- const char* SERD_NULLABLE name);
+serd_reader_read_document(SerdReader* SERD_NONNULL reader);
-/// Read a user-specified byte source
+/// Finish reading from the source
SERD_API
SerdStatus
-serd_reader_read_source(SerdReader* SERD_NONNULL reader,
- SerdSource SERD_NONNULL source,
- SerdStreamErrorFunc SERD_NONNULL error,
- void* SERD_NONNULL stream,
- const char* SERD_NULLABLE name,
- size_t page_size);
+serd_reader_end_stream(SerdReader* SERD_NONNULL reader);
/// Read `utf8`
SERD_API
diff --git a/src/n3.c b/src/n3.c
index fe6da47f..4ea6adbe 100644
--- a/src/n3.c
+++ b/src/n3.c
@@ -1683,8 +1683,9 @@ read_nquadsDoc(SerdReader* reader)
}
if (peek_byte(reader) == '@') {
- return r_err(
+ r_err(
reader, SERD_ERR_BAD_SYNTAX, "syntax does not support directives\n");
+ return SERD_ERR_BAD_SYNTAX;
}
// subject predicate object
diff --git a/src/reader.c b/src/reader.c
index 6e814d93..194c295b 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -23,11 +23,13 @@
#include <errno.h>
#include <stdarg.h>
-#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+static SerdStatus
+serd_reader_prepare(SerdReader* reader);
+
SerdStatus
r_err(SerdReader* reader, SerdStatus st, const char* fmt, ...)
{
@@ -65,22 +67,6 @@ blank_id(SerdReader* reader)
return ref;
}
-/** fread-like wrapper for getc (which is faster). */
-static size_t
-serd_file_read_byte(void* buf, size_t size, size_t nmemb, void* stream)
-{
- (void)size;
- (void)nmemb;
-
- const int c = getc((FILE*)stream);
- if (c == EOF) {
- *((uint8_t*)buf) = 0;
- return 0;
- }
- *((uint8_t*)buf) = (uint8_t)c;
- return 1;
-}
-
Ref
push_node_padded(SerdReader* reader,
size_t maxlen,
@@ -165,9 +151,16 @@ read_statement(SerdReader* reader)
return read_n3_statement(reader);
}
-static SerdStatus
-read_doc(SerdReader* reader)
+SerdStatus
+serd_reader_read_document(SerdReader* reader)
{
+ if (!reader->source.prepared) {
+ SerdStatus st = serd_reader_prepare(reader);
+ if (st) {
+ return st;
+ }
+ }
+
return ((reader->syntax == SERD_NQUADS) ? read_nquadsDoc(reader)
: read_turtleTrigDoc(reader));
}
@@ -281,10 +274,23 @@ serd_reader_read_file(SerdReader* reader, const char* uri)
return SERD_ERR_UNKNOWN;
}
- SerdStatus ret = serd_reader_read_file_handle(reader, fd, path);
+ SerdStatus st = serd_reader_start_stream(reader,
+ (SerdSource)fread,
+ (SerdStreamErrorFunc)ferror,
+ fd,
+ path,
+ SERD_PAGE_SIZE);
+
+ if (!st) {
+ st = serd_reader_read_document(reader);
+ }
+
+ const SerdStatus est = serd_reader_end_stream(reader);
+
fclose(fd);
free(path);
- return ret;
+
+ return st ? st : est;
}
static SerdStatus
@@ -305,30 +311,21 @@ skip_bom(SerdReader* me)
}
SerdStatus
-serd_reader_start_stream(SerdReader* reader,
- FILE* file,
- const char* name,
- bool bulk)
+serd_reader_start_stream(SerdReader* reader,
+ SerdSource read_func,
+ SerdStreamErrorFunc error_func,
+ void* stream,
+ const char* name,
+ size_t page_size)
{
- return serd_reader_start_source_stream(reader,
- bulk ? (SerdSource)fread
- : serd_file_read_byte,
- (SerdStreamErrorFunc)ferror,
- file,
- name,
- bulk ? SERD_PAGE_SIZE : 1);
+ return serd_byte_source_open_source(
+ &reader->source, read_func, error_func, stream, name, page_size);
}
SerdStatus
-serd_reader_start_source_stream(SerdReader* reader,
- SerdSource read_func,
- SerdStreamErrorFunc error_func,
- void* stream,
- const char* name,
- size_t page_size)
+serd_reader_start_string(SerdReader* reader, const char* utf8)
{
- return serd_byte_source_open_source(
- &reader->source, read_func, error_func, stream, name, page_size);
+ return serd_byte_source_open_string(&reader->source, utf8);
}
static SerdStatus
@@ -370,51 +367,12 @@ serd_reader_end_stream(SerdReader* reader)
}
SerdStatus
-serd_reader_read_file_handle(SerdReader* reader, FILE* file, const char* name)
-{
- return serd_reader_read_source(reader,
- (SerdSource)fread,
- (SerdStreamErrorFunc)ferror,
- file,
- name,
- SERD_PAGE_SIZE);
-}
-
-SerdStatus
-serd_reader_read_source(SerdReader* reader,
- SerdSource source,
- SerdStreamErrorFunc error,
- void* stream,
- const char* name,
- size_t page_size)
-{
- SerdStatus st = serd_reader_start_source_stream(
- reader, source, error, stream, name, page_size);
-
- if (st || (st = serd_reader_prepare(reader))) {
- serd_reader_end_stream(reader);
- return st;
- }
-
- if ((st = read_doc(reader))) {
- serd_reader_end_stream(reader);
- return st;
- }
-
- return serd_reader_end_stream(reader);
-}
-
-SerdStatus
serd_reader_read_string(SerdReader* reader, const char* utf8)
{
- serd_byte_source_open_string(&reader->source, utf8);
+ serd_reader_start_string(reader, utf8);
- SerdStatus st = serd_reader_prepare(reader);
- if (!st) {
- st = read_doc(reader);
- }
+ const SerdStatus st = serd_reader_read_document(reader);
+ const SerdStatus est = serd_byte_source_close(&reader->source);
- serd_byte_source_close(&reader->source);
-
- return st;
+ return st ? st : est;
}
diff --git a/src/serdi.c b/src/serdi.c
index f0cf8f4b..0e59c5ee 100644
--- a/src/serdi.c
+++ b/src/serdi.c
@@ -35,6 +35,7 @@
#include <errno.h>
#include <stdbool.h>
+#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -150,6 +151,22 @@ serd_fopen(const char* path, const char* mode)
return fd;
}
+/** fread-like wrapper for getc (which is faster). */
+static size_t
+serd_file_read_byte(void* buf, size_t size, size_t nmemb, void* stream)
+{
+ (void)size;
+ (void)nmemb;
+
+ const int c = getc((FILE*)stream);
+ if (c == EOF) {
+ *((uint8_t*)buf) = 0;
+ return 0;
+ }
+ *((uint8_t*)buf) = (uint8_t)c;
+ return 1;
+}
+
static SerdWriterFlags
choose_style(const SerdSyntax input_syntax,
const SerdSyntax output_syntax,
@@ -345,17 +362,23 @@ main(int argc, char** argv)
SerdStatus st = SERD_SUCCESS;
if (!from_file) {
- st = serd_reader_read_string(reader, input);
- } else if (bulk_read) {
- st = serd_reader_read_file_handle(reader, in_fd, in_name);
+ st = serd_reader_start_string(reader, input);
} else {
- st = serd_reader_start_stream(reader, in_fd, in_name, false);
- while (!st) {
- st = serd_reader_read_chunk(reader);
- }
- serd_reader_end_stream(reader);
+ st = serd_reader_start_stream(reader,
+ bulk_read ? (SerdSource)fread
+ : serd_file_read_byte,
+ (SerdStreamErrorFunc)ferror,
+ in_fd,
+ in_name,
+ bulk_read ? 4096 : 1);
+ }
+
+ if (!st) {
+ st = serd_reader_read_document(reader);
}
+ serd_reader_end_stream(reader);
+
serd_reader_free(reader);
serd_writer_finish(writer);
serd_writer_free(writer);
diff --git a/test/test_read_chunk.c b/test/test_read_chunk.c
index dd2961ac..76e9b642 100644
--- a/test/test_read_chunk.c
+++ b/test/test_read_chunk.c
@@ -19,8 +19,7 @@
#include "serd/serd.h"
#include <assert.h>
-#include <stdbool.h>
-#include <stdio.h>
+#include <stddef.h>
static size_t n_base = 0;
static size_t n_prefix = 0;
@@ -80,39 +79,32 @@ on_end(void* handle, const SerdNode* node)
int
main(void)
{
- FILE* file = tmpfile();
-
- fprintf(file,
- "@prefix eg: <http://example.org/> .\n"
- "@base <http://example.org/base> .\n"
- "eg:s1 eg:p1 eg:o1 ;\n"
- " eg:p2 eg:o2 ,\n"
- " eg:o3 .\n"
- "eg:s2 eg:p1 eg:o1 ;\n"
- " eg:p2 eg:o2 .\n"
- "eg:s3 eg:p1 eg:o1 .\n"
- "eg:s4 eg:p1 [ eg:p3 eg:o1 ] .\n");
-
- fseek(file, 0, SEEK_SET);
-
SerdReader* reader = serd_reader_new(
SERD_TURTLE, NULL, NULL, on_base, on_prefix, on_statement, on_end);
-
assert(reader);
- assert(!serd_reader_start_stream(reader, file, NULL, true));
+
+ assert(!serd_reader_start_string(reader,
+ "@prefix eg: <http://example.org/> .\n"
+ "@base <http://example.org/base> .\n"
+ "eg:s1 eg:p1 eg:o1 ;\n"
+ " eg:p2 eg:o2 ,\n"
+ " eg:o3 .\n"
+ "eg:s2 eg:p1 eg:o1 ;\n"
+ " eg:p2 eg:o2 .\n"
+ "eg:s3 eg:p1 eg:o1 .\n"
+ "eg:s4 eg:p1 [ eg:p3 eg:o1 ] .\n"));
assert(!serd_reader_read_chunk(reader) && n_prefix == 1);
assert(!serd_reader_read_chunk(reader) && n_base == 1);
assert(!serd_reader_read_chunk(reader) && n_statement == 3);
assert(!serd_reader_read_chunk(reader) && n_statement == 5);
assert(!serd_reader_read_chunk(reader) && n_statement == 6);
- assert(!serd_reader_read_chunk(reader) && n_statement == 8 && n_end == 1);
+ assert(!serd_reader_read_chunk(reader) && n_statement == 8);
assert(serd_reader_read_chunk(reader) == SERD_FAILURE);
+ assert(n_end == 1);
assert(serd_reader_read_chunk(reader) == SERD_FAILURE);
-
assert(!serd_reader_end_stream(reader));
- serd_reader_free(reader);
- fclose(file);
+ serd_reader_free(reader);
return 0;
}
diff --git a/test/test_reader_writer.c b/test/test_reader_writer.c
index 5431462a..48bb508e 100644
--- a/test/test_reader_writer.c
+++ b/test/test_reader_writer.c
@@ -19,7 +19,6 @@
#include "serd/serd.h"
#include <assert.h>
-#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
@@ -89,7 +88,8 @@ test_read_chunks(void)
assert(serd_reader_handle(reader) == rt);
assert(f);
- SerdStatus st = serd_reader_start_stream(reader, f, NULL, false);
+ SerdStatus st = serd_reader_start_stream(
+ reader, (SerdSource)fread, (SerdStreamErrorFunc)ferror, f, NULL, 1);
assert(st == SERD_SUCCESS);
// Write two statement separated by null characters
@@ -292,7 +292,8 @@ test_reader(const char* path)
fflush(temp);
fseek(temp, 0L, SEEK_SET);
- serd_reader_start_stream(reader, temp, NULL, true);
+ serd_reader_start_stream(
+ reader, (SerdSource)fread, (SerdStreamErrorFunc)ferror, temp, NULL, 4096);
assert(serd_reader_read_chunk(reader) == SERD_SUCCESS);
assert(serd_reader_read_chunk(reader) == SERD_FAILURE);
@@ -305,12 +306,12 @@ test_reader(const char* path)
// A byte-wise reader that hits EOF once then continues (like a socket)
{
size_t n_reads = 0;
- serd_reader_start_source_stream(reader,
- (SerdSource)eof_test_read,
- (SerdStreamErrorFunc)eof_test_error,
- &n_reads,
- NULL,
- 1);
+ serd_reader_start_stream(reader,
+ (SerdSource)eof_test_read,
+ (SerdStreamErrorFunc)eof_test_error,
+ &n_reads,
+ NULL,
+ 1);
assert(serd_reader_read_chunk(reader) == SERD_SUCCESS);
assert(serd_reader_read_chunk(reader) == SERD_FAILURE);