aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--serd/serd.h6
-rw-r--r--src/n3.c321
-rw-r--r--src/reader.c60
-rw-r--r--src/reader.h77
-rw-r--r--src/serdi.c16
-rw-r--r--src/stack.h11
-rw-r--r--src/string.c1
-rw-r--r--src/writer.c2
-rw-r--r--tests/read_chunk_test.c2
-rw-r--r--tests/serd_test.c2
10 files changed, 277 insertions, 221 deletions
diff --git a/serd/serd.h b/serd/serd.h
index e7b47693..514fccf4 100644
--- a/serd/serd.h
+++ b/serd/serd.h
@@ -128,7 +128,8 @@ typedef enum {
SERD_ERR_NOT_FOUND, /**< Not found */
SERD_ERR_ID_CLASH, /**< Encountered clashing blank node IDs */
SERD_ERR_BAD_CURIE, /**< Invalid CURIE (e.g. prefix does not exist) */
- SERD_ERR_INTERNAL /**< Unexpected internal error (should not happen) */
+ SERD_ERR_INTERNAL, /**< Unexpected internal error (should not happen) */
+ SERD_ERR_OVERFLOW /**< Stack overflow */
} SerdStatus;
/**
@@ -928,7 +929,8 @@ SERD_API
SerdReader*
serd_reader_new(SerdWorld* world,
SerdSyntax syntax,
- const SerdSinkInterface* sink);
+ const SerdSinkInterface* sink,
+ size_t stack_size);
/**
Enable or disable strict parsing.
diff --git a/src/n3.c b/src/n3.c
index f504fb75..a088813a 100644
--- a/src/n3.c
+++ b/src/n3.c
@@ -37,7 +37,7 @@ fancy_syntax(const SerdReader* reader)
}
static bool
-read_collection(SerdReader* reader, ReadContext ctx, Ref* dest);
+read_collection(SerdReader* reader, ReadContext ctx, SerdNode** dest);
static bool
read_predicateObjectList(SerdReader* reader, ReadContext ctx, bool* ate_dot);
@@ -54,8 +54,8 @@ read_HEX(SerdReader* reader)
}
// Read UCHAR escape, initial \ is already eaten by caller
-static inline bool
-read_UCHAR(SerdReader* reader, Ref dest, uint32_t* char_code)
+static inline SerdStatus
+read_UCHAR(SerdReader* reader, SerdNode* dest, uint32_t* char_code)
{
const uint8_t b = peek_byte(reader);
unsigned length = 0;
@@ -67,14 +67,14 @@ read_UCHAR(SerdReader* reader, Ref dest, uint32_t* char_code)
length = 4;
break;
default:
- return false;
+ return SERD_ERR_BAD_SYNTAX;
}
eat_byte_safe(reader, b);
uint8_t buf[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
for (unsigned i = 0; i < length; ++i) {
if (!(buf[i] = read_HEX(reader))) {
- return false;
+ return SERD_ERR_BAD_SYNTAX;
}
}
@@ -93,9 +93,9 @@ read_UCHAR(SerdReader* reader, Ref dest, uint32_t* char_code)
} else {
r_err(reader, SERD_ERR_BAD_SYNTAX,
"unicode character 0x%X out of range\n", code);
- push_bytes(reader, dest, replacement_char, 3);
*char_code = 0xFFFD;
- return true;
+ const SerdStatus st = push_bytes(reader, dest, replacement_char, 3);
+ return st ? st : SERD_SUCCESS;
}
// Build output in buf
@@ -118,44 +118,37 @@ read_UCHAR(SerdReader* reader, Ref dest, uint32_t* char_code)
buf[0] = (uint8_t)c;
}
- push_bytes(reader, dest, buf, size);
*char_code = code;
- return true;
+ return push_bytes(reader, dest, buf, size);
}
// Read ECHAR escape, initial \ is already eaten by caller
-static inline bool
-read_ECHAR(SerdReader* reader, Ref dest, SerdNodeFlags* flags)
+static inline SerdStatus
+read_ECHAR(SerdReader* reader, SerdNode* dest, SerdNodeFlags* flags)
{
const uint8_t c = peek_byte(reader);
switch (c) {
case 't':
eat_byte_safe(reader, 't');
- push_byte(reader, dest, '\t');
- return true;
+ return push_byte(reader, dest, '\t');
case 'b':
eat_byte_safe(reader, 'b');
- push_byte(reader, dest, '\b');
- return true;
+ return push_byte(reader, dest, '\b');
case 'n':
*flags |= SERD_HAS_NEWLINE;
eat_byte_safe(reader, 'n');
- push_byte(reader, dest, '\n');
- return true;
+ return push_byte(reader, dest, '\n');
case 'r':
*flags |= SERD_HAS_NEWLINE;
eat_byte_safe(reader, 'r');
- push_byte(reader, dest, '\r');
- return true;
+ return push_byte(reader, dest, '\r');
case 'f':
eat_byte_safe(reader, 'f');
- push_byte(reader, dest, '\f');
- return true;
+ return push_byte(reader, dest, '\f');
case '\\': case '"': case '\'':
- push_byte(reader, dest, eat_byte_safe(reader, c));
- return true;
+ return push_byte(reader, dest, eat_byte_safe(reader, c));
default:
- return false;
+ return SERD_ERR_BAD_SYNTAX;
}
}
@@ -193,21 +186,21 @@ read_utf8_bytes(SerdReader* reader, uint8_t bytes[4], uint32_t* size, uint8_t c)
}
static SerdStatus
-read_utf8_character(SerdReader* reader, Ref dest, uint8_t c)
+read_utf8_character(SerdReader* reader, SerdNode* dest, uint8_t c)
{
uint32_t size;
uint8_t bytes[4];
SerdStatus st = read_utf8_bytes(reader, bytes, &size, c);
if (st) {
push_bytes(reader, dest, replacement_char, 3);
- } else {
- push_bytes(reader, dest, bytes, size);
+ return st;
}
- return st;
+
+ return push_bytes(reader, dest, bytes, size);
}
static SerdStatus
-read_utf8_code(SerdReader* reader, Ref dest, uint32_t* code, uint8_t c)
+read_utf8_code(SerdReader* reader, SerdNode* dest, uint32_t* code, uint8_t c)
{
uint32_t size;
uint8_t bytes[4];
@@ -217,15 +210,17 @@ read_utf8_code(SerdReader* reader, Ref dest, uint32_t* code, uint8_t c)
return st;
}
- push_bytes(reader, dest, bytes, size);
- *code = parse_counted_utf8_char(bytes, size);
+ if (!(st = push_bytes(reader, dest, bytes, size))) {
+ *code = parse_counted_utf8_char(bytes, size);
+ }
+
return st;
}
// Read one character (possibly multi-byte)
// The first byte, c, has already been eaten by caller
static inline SerdStatus
-read_character(SerdReader* reader, Ref dest, SerdNodeFlags* flags, uint8_t c)
+read_character(SerdReader* reader, SerdNode* dest, SerdNodeFlags* flags, uint8_t c)
{
if (!(c & 0x80)) {
switch (c) {
@@ -236,8 +231,9 @@ read_character(SerdReader* reader, Ref dest, SerdNodeFlags* flags, uint8_t c)
*flags |= SERD_HAS_QUOTE;
break;
}
- push_byte(reader, dest, c);
- return SERD_SUCCESS;
+
+ const SerdStatus st = push_byte(reader, dest, c);
+ return st ? st : SERD_SUCCESS;
}
return read_utf8_character(reader, dest, c);
}
@@ -296,18 +292,19 @@ eat_delim(SerdReader* reader, const char delim)
// STRING_LITERAL_LONG_QUOTE and STRING_LITERAL_LONG_SINGLE_QUOTE
// Initial triple quotes are already eaten by caller
-static Ref
+static SerdNode*
read_STRING_LITERAL_LONG(SerdReader* reader, SerdNodeFlags* flags, uint8_t q)
{
- Ref ref = push_node(reader, SERD_LITERAL, "", 0);
+ SerdNode* ref = push_node(reader, SERD_LITERAL, "", 0);
+ SerdStatus st = SERD_SUCCESS;
while (!reader->status) {
const uint8_t c = peek_byte(reader);
uint32_t code;
switch (c) {
case '\\':
eat_byte_safe(reader, c);
- if (!read_ECHAR(reader, ref, flags) &&
- !read_UCHAR(reader, ref, &code)) {
+ if (read_ECHAR(reader, ref, flags) &&
+ read_UCHAR(reader, ref, &code)) {
r_err(reader, SERD_ERR_BAD_SYNTAX,
"invalid escape `\\%c'\n", peek_byte(reader));
return pop_node(reader, ref);
@@ -324,21 +321,22 @@ read_STRING_LITERAL_LONG(SerdReader* reader, SerdNodeFlags* flags, uint8_t q)
}
*flags |= SERD_HAS_QUOTE;
push_byte(reader, ref, c);
- read_character(reader, ref, flags, q2);
+ st = read_character(reader, ref, flags, q2);
} else {
- read_character(reader, ref, flags, eat_byte_safe(reader, c));
+ st = read_character(reader, ref, flags, eat_byte_safe(reader, c));
}
}
}
- return ref;
+ return st ? NULL : ref;
}
// STRING_LITERAL_QUOTE and STRING_LITERAL_SINGLE_QUOTE
// Initial quote is already eaten by caller
-static Ref
+static SerdNode*
read_STRING_LITERAL(SerdReader* reader, SerdNodeFlags* flags, uint8_t q)
{
- Ref ref = push_node(reader, SERD_LITERAL, "", 0);
+ SerdNode* ref = push_node(reader, SERD_LITERAL, "", 0);
+ SerdStatus st = SERD_SUCCESS;
while (!reader->status) {
const uint8_t c = peek_byte(reader);
uint32_t code = 0;
@@ -348,8 +346,8 @@ read_STRING_LITERAL(SerdReader* reader, SerdNodeFlags* flags, uint8_t q)
return pop_node(reader, ref);
case '\\':
eat_byte_safe(reader, c);
- if (!read_ECHAR(reader, ref, flags) &&
- !read_UCHAR(reader, ref, &code)) {
+ if (read_ECHAR(reader, ref, flags) &&
+ read_UCHAR(reader, ref, &code)) {
r_err(reader, SERD_ERR_BAD_SYNTAX,
"invalid escape `\\%c'\n", peek_byte(reader));
return pop_node(reader, ref);
@@ -360,15 +358,24 @@ read_STRING_LITERAL(SerdReader* reader, SerdNodeFlags* flags, uint8_t q)
eat_byte_check(reader, q);
return ref;
} else {
- read_character(reader, ref, flags, eat_byte_safe(reader, c));
+ st = read_character(reader, ref, flags, eat_byte_safe(reader, c));
}
}
}
- eat_byte_check(reader, q);
+
+ if (st) {
+ reader->status = st;
+ return NULL;
+ }
+
+ if (!eat_byte_check(reader, q)) {
+ return pop_node(reader, ref);
+ }
+
return ref;
}
-static Ref
+static SerdNode*
read_String(SerdReader* reader, SerdNodeFlags* flags)
{
const uint8_t q1 = peek_byte(reader);
@@ -386,8 +393,9 @@ read_String(SerdReader* reader, SerdNodeFlags* flags)
}
if (!fancy_syntax(reader)) {
- return r_err(reader, SERD_ERR_BAD_SYNTAX,
- "syntax does not support long literals\n");
+ r_err(reader, SERD_ERR_BAD_SYNTAX,
+ "syntax does not support long literals\n");
+ return NULL;
}
eat_byte_safe(reader, q3);
@@ -406,7 +414,7 @@ is_PN_CHARS_BASE(const uint32_t c)
}
static SerdStatus
-read_PN_CHARS_BASE(SerdReader* reader, Ref dest)
+read_PN_CHARS_BASE(SerdReader* reader, SerdNode* dest)
{
uint32_t code;
const uint8_t c = peek_byte(reader);
@@ -436,7 +444,7 @@ is_PN_CHARS(const uint32_t c)
}
static SerdStatus
-read_PN_CHARS(SerdReader* reader, Ref dest)
+read_PN_CHARS(SerdReader* reader, SerdNode* dest)
{
uint32_t code;
const uint8_t c = peek_byte(reader);
@@ -456,7 +464,7 @@ read_PN_CHARS(SerdReader* reader, Ref dest)
}
static bool
-read_PERCENT(SerdReader* reader, Ref dest)
+read_PERCENT(SerdReader* reader, SerdNode* dest)
{
push_byte(reader, dest, eat_byte_safe(reader, '%'));
const uint8_t h1 = read_HEX(reader);
@@ -470,7 +478,7 @@ read_PERCENT(SerdReader* reader, Ref dest)
}
static SerdStatus
-read_PLX(SerdReader* reader, Ref dest)
+read_PLX(SerdReader* reader, SerdNode* dest)
{
uint8_t c = peek_byte(reader);
switch (c) {
@@ -494,7 +502,7 @@ read_PLX(SerdReader* reader, Ref dest)
}
static SerdStatus
-read_PN_LOCAL(SerdReader* reader, Ref dest, bool* ate_dot)
+read_PN_LOCAL(SerdReader* reader, SerdNode* dest, bool* ate_dot)
{
uint8_t c = peek_byte(reader);
SerdStatus st = SERD_SUCCESS;
@@ -523,10 +531,9 @@ read_PN_LOCAL(SerdReader* reader, Ref dest, bool* ate_dot)
trailing_unescaped_dot = (c == '.');
}
- SerdNode* const n = deref(reader, dest);
if (trailing_unescaped_dot) {
// Ate trailing dot, pop it from stack/node and inform caller
- --n->n_bytes;
+ --dest->n_bytes;
serd_stack_pop(&reader->stack, 1);
*ate_dot = true;
}
@@ -536,7 +543,7 @@ read_PN_LOCAL(SerdReader* reader, Ref dest, bool* ate_dot)
// Read the remainder of a PN_PREFIX after some initial characters
static SerdStatus
-read_PN_PREFIX_tail(SerdReader* reader, Ref dest)
+read_PN_PREFIX_tail(SerdReader* reader, SerdNode* dest)
{
uint8_t c;
while ((c = peek_byte(reader))) { // Middle: (PN_CHARS | '.')*
@@ -547,8 +554,7 @@ read_PN_PREFIX_tail(SerdReader* reader, Ref dest)
}
}
- const SerdNode* const n = deref(reader, dest);
- if (serd_node_get_string(n)[n->n_bytes - 1] == '.' &&
+ if (serd_node_get_string(dest)[dest->n_bytes - 1] == '.' &&
read_PN_CHARS(reader, dest)) {
r_err(reader, SERD_ERR_BAD_SYNTAX, "prefix ends with `.'\n");
return SERD_ERR_BAD_SYNTAX;
@@ -558,7 +564,7 @@ read_PN_PREFIX_tail(SerdReader* reader, Ref dest)
}
static SerdStatus
-read_PN_PREFIX(SerdReader* reader, Ref dest)
+read_PN_PREFIX(SerdReader* reader, SerdNode* dest)
{
if (!read_PN_CHARS_BASE(reader, dest)) {
return read_PN_PREFIX_tail(reader, dest);
@@ -566,14 +572,20 @@ read_PN_PREFIX(SerdReader* reader, Ref dest)
return SERD_FAILURE;
}
-static Ref
+static SerdNode*
read_LANGTAG(SerdReader* reader)
{
uint8_t c = peek_byte(reader);
if (!is_alpha(c)) {
- return r_err(reader, SERD_ERR_BAD_SYNTAX, "unexpected `%c'\n", c);
+ r_err(reader, SERD_ERR_BAD_SYNTAX, "unexpected `%c'\n", c);
+ return NULL;
+ }
+
+ SerdNode* ref = push_node(reader, SERD_LITERAL, "", 0);
+ if (!ref) {
+ return NULL;
}
- Ref ref = push_node(reader, SERD_LITERAL, "", 0);
+
push_byte(reader, ref, eat_byte_safe(reader, c));
while ((c = peek_byte(reader)) && is_alpha(c)) {
push_byte(reader, ref, eat_byte_safe(reader, c));
@@ -588,7 +600,7 @@ read_LANGTAG(SerdReader* reader)
}
static bool
-read_IRIREF_scheme(SerdReader* reader, Ref dest)
+read_IRIREF_scheme(SerdReader* reader, SerdNode* dest)
{
uint8_t c = peek_byte(reader);
if (!is_alpha(c)) {
@@ -613,12 +625,12 @@ read_IRIREF_scheme(SerdReader* reader, Ref dest)
return false;
}
-static Ref
+static SerdNode*
read_IRIREF(SerdReader* reader)
{
TRY_RET(eat_byte_check(reader, '<'));
- Ref ref = push_node(reader, SERD_URI, "", 0);
- if (!fancy_syntax(reader) && !read_IRIREF_scheme(reader, ref)) {
+ SerdNode* ref = push_node(reader, SERD_URI, "", 0);
+ if (!ref || (!fancy_syntax(reader) && !read_IRIREF_scheme(reader, ref))) {
return pop_node(reader, ref);
}
@@ -633,7 +645,7 @@ read_IRIREF(SerdReader* reader)
case '>':
return ref;
case '\\':
- if (!read_UCHAR(reader, ref, &code)) {
+ if (read_UCHAR(reader, ref, &code)) {
r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid IRI escape\n");
return pop_node(reader, ref);
}
@@ -672,7 +684,7 @@ read_IRIREF(SerdReader* reader)
}
static bool
-read_PrefixedName(SerdReader* reader, Ref dest, bool read_prefix, bool* ate_dot)
+read_PrefixedName(SerdReader* reader, SerdNode* dest, bool read_prefix, bool* ate_dot)
{
if (read_prefix && read_PN_PREFIX(reader, dest) > SERD_FAILURE) {
return false;
@@ -685,7 +697,7 @@ read_PrefixedName(SerdReader* reader, Ref dest, bool read_prefix, bool* ate_dot)
}
static bool
-read_0_9(SerdReader* reader, Ref str, bool at_least_one)
+read_0_9(SerdReader* reader, SerdNode* str, bool at_least_one)
{
unsigned count = 0;
for (uint8_t c; is_digit((c = peek_byte(reader))); ++count) {
@@ -699,20 +711,23 @@ read_0_9(SerdReader* reader, Ref str, bool at_least_one)
static bool
read_number(SerdReader* reader,
- Ref* dest,
- Ref* datatype,
+ SerdNode** dest,
+ SerdNode** datatype,
SerdNodeFlags* flags,
bool* ate_dot)
{
#define XSD_DECIMAL NS_XSD "decimal"
#define XSD_DOUBLE NS_XSD "double"
#define XSD_INTEGER NS_XSD "integer"
- Ref ref = push_node(reader, SERD_LITERAL, "", 0);
- uint8_t c = peek_byte(reader);
- bool has_decimal = false;
- if (c == '-' || c == '+') {
+ SerdNode* ref = push_node(reader, SERD_LITERAL, "", 0);
+ uint8_t c = peek_byte(reader);
+ bool has_decimal = false;
+ if (!ref) {
+ return false;
+ } else if (c == '-' || c == '+') {
push_byte(reader, ref, eat_byte_safe(reader, c));
}
+
if ((c = peek_byte(reader)) == '.') {
has_decimal = true;
// decimal case 2 (e.g. '.0' or `-.0' or `+.0')
@@ -767,23 +782,29 @@ except:
}
static bool
-read_iri(SerdReader* reader, Ref* dest, bool* ate_dot)
+read_iri(SerdReader* reader, SerdNode** dest, bool* ate_dot)
{
switch (peek_byte(reader)) {
case '<':
*dest = read_IRIREF(reader);
return true;
default:
- *dest = push_node(reader, SERD_CURIE, "", 0);
+ if (!(*dest = push_node(reader, SERD_CURIE, "", 0))) {
+ return false;
+ }
return read_PrefixedName(reader, *dest, true, ate_dot);
}
}
static bool
-read_literal(SerdReader* reader, Ref* dest,
- Ref* datatype, Ref* lang, SerdNodeFlags* flags, bool* ate_dot)
+read_literal(SerdReader* reader,
+ SerdNode** dest,
+ SerdNode** datatype,
+ SerdNode** lang,
+ SerdNodeFlags* flags,
+ bool* ate_dot)
{
- Ref str = read_String(reader, flags);
+ SerdNode* str = read_String(reader, flags);
if (!str) {
return false;
}
@@ -823,7 +844,7 @@ is_token_end(uint8_t c)
}
static bool
-read_verb(SerdReader* reader, Ref* dest)
+read_verb(SerdReader* reader, SerdNode** dest)
{
if (peek_byte(reader) == '<') {
return (*dest = read_IRIREF(reader));
@@ -832,10 +853,13 @@ read_verb(SerdReader* reader, Ref* dest)
/* Either a qname, or "a". Read the prefix first, and if it is in fact
"a", produce that instead.
*/
- *dest = push_node(reader, SERD_CURIE, "", 0);
+ if (!(*dest = push_node(reader, SERD_CURIE, "", 0))) {
+ return false;
+ }
+
const SerdStatus st = read_PN_PREFIX(reader, *dest);
bool ate_dot = false;
- SerdNode* node = deref(reader, *dest);
+ SerdNode* node = *dest;
if (!st && node->n_bytes == 1 &&
serd_node_get_string(node)[0] == 'a' &&
is_token_end(peek_byte(reader))) {
@@ -851,34 +875,36 @@ read_verb(SerdReader* reader, Ref* dest)
return true;
}
-static Ref
+static SerdNode*
read_BLANK_NODE_LABEL(SerdReader* reader, bool* ate_dot)
{
eat_byte_safe(reader, '_');
eat_byte_check(reader, ':');
- Ref ref = push_node(reader, SERD_BLANK,
- reader->bprefix ? reader->bprefix : "",
- reader->bprefix_len);
+ SerdNode* n = push_node(reader, SERD_BLANK,
+ reader->bprefix ? reader->bprefix : "",
+ reader->bprefix_len);
+ if (!n) {
+ return NULL;
+ }
uint8_t c = peek_byte(reader); // First: (PN_CHARS | '_' | [0-9])
if (is_digit(c) || c == '_') {
- push_byte(reader, ref, eat_byte_safe(reader, c));
- } else if (read_PN_CHARS(reader, ref)) {
+ push_byte(reader, n, eat_byte_safe(reader, c));
+ } else if (read_PN_CHARS(reader, n)) {
r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid name start character\n");
- return pop_node(reader, ref);
+ return pop_node(reader, n);
}
while ((c = peek_byte(reader))) { // Middle: (PN_CHARS | '.')*
if (c == '.') {
- push_byte(reader, ref, eat_byte_safe(reader, c));
- } else if (read_PN_CHARS(reader, ref)) {
+ push_byte(reader, n, eat_byte_safe(reader, c));
+ } else if (read_PN_CHARS(reader, n)) {
break;
}
}
- SerdNode* n = deref(reader, ref);
- char* buf = serd_node_buffer(n);
- if (buf[n->n_bytes - 1] == '.' && read_PN_CHARS(reader, ref)) {
+ char* buf = serd_node_buffer(n);
+ if (buf[n->n_bytes - 1] == '.' && read_PN_CHARS(reader, n)) {
// Ate trailing dot, pop it from stack/node and inform caller
--n->n_bytes;
serd_stack_pop(&reader->stack, 1);
@@ -893,30 +919,31 @@ read_BLANK_NODE_LABEL(SerdReader* reader, bool* ate_dot)
} else if (reader->seen_genid && buf[reader->bprefix_len] == 'B') {
r_err(reader, SERD_ERR_ID_CLASH,
"found both `b' and `B' blank IDs, prefix required\n");
- return pop_node(reader, ref);
+ return pop_node(reader, n);
}
}
}
- return ref;
+ return n;
}
-static Ref
+static SerdNode*
read_blankName(SerdReader* reader)
{
eat_byte_safe(reader, '=');
if (eat_byte_check(reader, '=') != '=') {
- return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected `='\n");
+ r_err(reader, SERD_ERR_BAD_SYNTAX, "expected `='\n");
+ return NULL;
}
- Ref subject = 0;
- bool ate_dot = false;
+ SerdNode* subject = 0;
+ bool ate_dot = false;
read_ws_star(reader);
read_iri(reader, &subject, &ate_dot);
return subject;
}
static bool
-read_anon(SerdReader* reader, ReadContext ctx, bool subject, Ref* dest)
+read_anon(SerdReader* reader, ReadContext ctx, bool subject, SerdNode** dest)
{
const SerdStatementFlags old_flags = *ctx.flags;
bool empty;
@@ -953,7 +980,7 @@ read_anon(SerdReader* reader, ReadContext ctx, bool subject, Ref* dest)
}
read_ws_star(reader);
if (reader->sink->end) {
- reader->sink->end(reader->sink->handle, deref(reader, *dest));
+ reader->sink->end(reader->sink->handle, *dest);
}
*ctx.flags = old_flags;
}
@@ -975,10 +1002,9 @@ read_object(SerdReader* reader, ReadContext* ctx, bool emit, bool* ate_dot)
bool ret = false;
bool simple = (ctx->subject != 0);
- SerdNode* node = NULL;
- Ref o = 0;
- Ref datatype = 0;
- Ref lang = 0;
+ SerdNode* o = 0;
+ SerdNode* datatype = 0;
+ SerdNode* lang = 0;
uint32_t flags = 0;
const uint8_t c = peek_byte(reader);
if (!fancy_syntax(reader)) {
@@ -1017,17 +1043,16 @@ read_object(SerdReader* reader, ReadContext* ctx, bool emit, bool* ate_dot)
/* Either a boolean literal, or a qname. Read the prefix first, and if
it is in fact a "true" or "false" literal, produce that instead.
*/
- o = push_node(reader, SERD_CURIE, "", 0);
+ TRY_THROW(o = push_node(reader, SERD_CURIE, "", 0));
while (!read_PN_CHARS_BASE(reader, o)) {}
- node = deref(reader, o);
- if ((node->n_bytes == 4 &&
- !memcmp(serd_node_get_string(node), "true", 4)) ||
- (node->n_bytes == 5 &&
- !memcmp(serd_node_get_string(node), "false", 5))) {
- flags = flags | SERD_HAS_DATATYPE;
- node->type = SERD_LITERAL;
- datatype = push_node(
- reader, SERD_URI, XSD_BOOLEAN, XSD_BOOLEAN_LEN);
+ if ((o->n_bytes == 4 &&
+ !memcmp(serd_node_get_string(o), "true", 4)) ||
+ (o->n_bytes == 5 &&
+ !memcmp(serd_node_get_string(o), "false", 5))) {
+ flags = flags | SERD_HAS_DATATYPE;
+ o->type = SERD_LITERAL;
+ TRY_THROW(datatype = push_node(
+ reader, SERD_URI, XSD_BOOLEAN, XSD_BOOLEAN_LEN));
ret = true;
} else if (read_PN_PREFIX_tail(reader, o) > SERD_FAILURE) {
ret = false;
@@ -1039,7 +1064,7 @@ read_object(SerdReader* reader, ReadContext* ctx, bool emit, bool* ate_dot)
}
if (simple && o) {
- deref(reader, o)->flags = flags;
+ o->flags = flags;
}
if (ret && emit && simple) {
@@ -1112,7 +1137,7 @@ read_predicateObjectList(SerdReader* reader, ReadContext ctx, bool* ate_dot)
}
static bool
-end_collection(SerdReader* reader, ReadContext ctx, Ref n1, Ref n2, bool ret)
+end_collection(SerdReader* reader, ReadContext ctx, SerdNode* n1, SerdNode* n2, bool ret)
{
pop_node(reader, n2);
pop_node(reader, n1);
@@ -1121,7 +1146,7 @@ end_collection(SerdReader* reader, ReadContext ctx, Ref n1, Ref n2, bool ret)
}
static bool
-read_collection(SerdReader* reader, ReadContext ctx, Ref* dest)
+read_collection(SerdReader* reader, ReadContext ctx, SerdNode** dest)
{
eat_byte_safe(reader, '(');
bool end = peek_delim(reader, ')');
@@ -1141,10 +1166,14 @@ read_collection(SerdReader* reader, ReadContext ctx, Ref* dest)
/* The order of node allocation here is necessarily not in stack order,
so we create two nodes and recycle them throughout. */
- Ref n1 = push_node_padded(reader, genid_size(reader), SERD_BLANK, "", 0);
- Ref n2 = 0;
- Ref node = n1;
- Ref rest = 0;
+ SerdNode* n1 = push_node_padded(reader, genid_size(reader), SERD_BLANK, "", 0);
+ SerdNode* n2 = 0;
+ SerdNode* node = n1;
+ SerdNode* rest = 0;
+
+ if (!n1) {
+ return false;
+ }
ctx.subject = *dest;
while (!(end = peek_delim(reader, ')'))) {
@@ -1179,8 +1208,8 @@ read_collection(SerdReader* reader, ReadContext ctx, Ref* dest)
return end_collection(reader, ctx, n1, n2, true);
}
-static Ref
-read_subject(SerdReader* reader, ReadContext ctx, Ref* dest, char* s_type)
+static SerdNode*
+read_subject(SerdReader* reader, ReadContext ctx, SerdNode** dest, char* s_type)
{
bool ate_dot = false;
switch ((*s_type = peek_byte(reader))) {
@@ -1199,11 +1228,11 @@ read_subject(SerdReader* reader, ReadContext ctx, Ref* dest, char* s_type)
return ate_dot ? pop_node(reader, *dest) : *dest;
}
-static Ref
+static SerdNode*
read_labelOrSubject(SerdReader* reader, ReadContext ctx)
{
- Ref subject = 0;
- bool ate_dot = false;
+ SerdNode* subject = 0;
+ bool ate_dot = false;
switch (peek_byte(reader)) {
case '[':
eat_byte_safe(reader, '[');
@@ -1244,11 +1273,12 @@ read_base(SerdReader* reader, bool sparql, bool token)
TRY_RET(eat_string(reader, "base", 4));
}
- Ref uri;
read_ws_star(reader);
- TRY_RET(uri = read_IRIREF(reader));
- if (reader->sink->base) {
- reader->sink->base(reader->sink->handle, deref(reader, uri));
+ SerdNode* uri = read_IRIREF(reader);
+ if (!uri) {
+ return false;
+ } else if (reader->sink->base) {
+ reader->sink->base(reader->sink->handle, uri);
}
pop_node(reader, uri);
@@ -1270,9 +1300,11 @@ read_prefixID(SerdReader* reader, bool sparql, bool token)
}
read_ws_star(reader);
- bool ret = true;
- Ref name = push_node(reader, SERD_LITERAL, "", 0);
- if (read_PN_PREFIX(reader, name) > SERD_FAILURE) {
+ bool ret = true;
+ SerdNode* name = push_node(reader, SERD_LITERAL, "", 0);
+ if (!name) {
+ return false;
+ } else if (read_PN_PREFIX(reader, name) > SERD_FAILURE) {
return pop_node(reader, name);
}
@@ -1281,16 +1313,14 @@ read_prefixID(SerdReader* reader, bool sparql, bool token)
}
read_ws_star(reader);
- const Ref uri = read_IRIREF(reader);
+ const SerdNode* uri = read_IRIREF(reader);
if (!uri) {
pop_node(reader, name);
return false;
}
if (reader->sink->prefix) {
- ret = !reader->sink->prefix(reader->sink->handle,
- deref(reader, name),
- deref(reader, uri));
+ ret = !reader->sink->prefix(reader->sink->handle, name, uri);
}
pop_node(reader, uri);
pop_node(reader, name);
@@ -1333,7 +1363,7 @@ read_wrappedGraph(SerdReader* reader, ReadContext* ctx)
read_ws_star(reader);
while (peek_byte(reader) != '}') {
ctx->subject = 0;
- Ref subj = read_subject(reader, *ctx, &ctx->subject, &s_type);
+ SerdNode* subj = read_subject(reader, *ctx, &ctx->subject, &s_type);
if (!subj && ctx->subject) {
return r_err(reader, SERD_ERR_BAD_SYNTAX, "bad subject\n");
} else if (!subj) {
@@ -1353,9 +1383,8 @@ read_wrappedGraph(SerdReader* reader, ReadContext* ctx)
}
static int
-tokcmp(SerdReader* reader, Ref ref, const char* tok, size_t n)
+tokcmp(SerdReader* reader, SerdNode* node, const char* tok, size_t n)
{
- SerdNode* node = deref(reader, ref);
if (!node || node->n_bytes != n) {
return -1;
}
@@ -1367,7 +1396,7 @@ read_n3_statement(SerdReader* reader)
{
SerdStatementFlags flags = 0;
ReadContext ctx = { 0, 0, 0, 0, 0, 0, &flags };
- Ref subj = 0;
+ SerdNode* subj = 0;
bool ate_dot = false;
char s_type = 0;
bool ret = true;
diff --git a/src/reader.c b/src/reader.c
index 1233bb19..31ee5287 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -43,9 +43,8 @@ r_err(SerdReader* reader, SerdStatus st, const char* fmt, ...)
}
void
-set_blank_id(SerdReader* reader, Ref ref, size_t buf_size)
+set_blank_id(SerdReader* reader, SerdNode* node, size_t buf_size)
{
- SerdNode* node = deref(reader, ref);
char* buf = (char*)(node + 1);
const char* prefix = reader->bprefix ? (const char*)reader->bprefix : "";
node->n_bytes = snprintf(buf, buf_size, "%sb%u", prefix, reader->next_id++);
@@ -57,21 +56,29 @@ genid_size(SerdReader* reader)
return reader->bprefix_len + 1 + 10 + 1; // + "b" + UINT32_MAX + \0
}
-Ref
+SerdNode*
blank_id(SerdReader* reader)
{
- Ref ref = push_node_padded(reader, genid_size(reader), SERD_BLANK, "", 0);
- set_blank_id(reader, ref, genid_size(reader));
+ SerdNode* ref = push_node_padded(
+ reader, genid_size(reader), SERD_BLANK, "", 0);
+ if (ref) {
+ set_blank_id(reader, ref, genid_size(reader));
+ }
return ref;
}
-Ref
+SerdNode*
push_node_padded(SerdReader* reader, size_t maxlen,
SerdType type, const char* str, size_t n_bytes)
{
void* mem = serd_stack_push_aligned(
&reader->stack, sizeof(SerdNode) + maxlen + 1, sizeof(SerdNode));
+ if (!mem) {
+ reader->status = SERD_ERR_OVERFLOW;
+ return NULL;
+ }
+
SerdNode* const node = (SerdNode*)mem;
node->n_bytes = n_bytes;
node->flags = 0;
@@ -85,49 +92,45 @@ push_node_padded(SerdReader* reader, size_t maxlen,
reader->allocs, sizeof(reader->allocs) * (++reader->n_allocs));
reader->allocs[reader->n_allocs - 1] = (mem - reader->stack.buf);
#endif
- return (char*)node - reader->stack.buf;
+ return node;
}
-Ref
+SerdNode*
push_node(SerdReader* reader, SerdType type, const char* str, size_t n_bytes)
{
return push_node_padded(reader, n_bytes, type, str, n_bytes);
}
SerdNode*
-deref(SerdReader* reader, const Ref ref)
+pop_node(SerdReader* reader, const SerdNode* node)
{
- return ref ? (SerdNode*)(reader->stack.buf + ref) : NULL;
-}
-
-Ref
-pop_node(SerdReader* reader, Ref ref)
-{
- if (ref && ref != reader->rdf_first && ref != reader->rdf_rest
- && ref != reader->rdf_nil) {
+ if (node && node != reader->rdf_first && node != reader->rdf_rest
+ && node != reader->rdf_nil) {
#ifdef SERD_STACK_CHECK
- SERD_STACK_ASSERT_TOP(reader, ref);
+ SERD_STACK_ASSERT_TOP(reader, node);
--reader->n_allocs;
#endif
- SerdNode* const node = deref(reader, ref);
- char* const top = reader->stack.buf + reader->stack.size;
+ char* const top = reader->stack.buf + reader->stack.size;
serd_stack_pop_aligned(&reader->stack, top - (char*)node);
}
- return 0;
+ return NULL;
}
bool
-emit_statement(SerdReader* reader, ReadContext ctx, Ref o, Ref d, Ref l)
+emit_statement(SerdReader* reader,
+ ReadContext ctx,
+ SerdNode* o,
+ SerdNode* d,
+ SerdNode* l)
{
- SerdNode* graph = deref(reader, ctx.graph);
+ SerdNode* graph = ctx.graph;
if (!graph && reader->default_graph) {
graph = reader->default_graph;
}
bool ret = !reader->sink->statement ||
!reader->sink->statement(
reader->sink->handle, *ctx.flags, graph,
- deref(reader, ctx.subject), deref(reader, ctx.predicate),
- deref(reader, o));
+ ctx.subject, ctx.predicate, o);
*ctx.flags &= SERD_ANON_CONT|SERD_LIST_CONT; // Preserve only cont flags
return ret;
}
@@ -157,15 +160,16 @@ serd_reader_read_document(SerdReader* reader)
}
SerdReader*
-serd_reader_new(SerdWorld* world,
+serd_reader_new(SerdWorld* world,
SerdSyntax syntax,
- const SerdSinkInterface* sink)
+ const SerdSinkInterface* sink,
+ size_t stack_size)
{
SerdReader* me = (SerdReader*)calloc(1, sizeof(SerdReader));
me->world = world;
me->sink = sink;
me->default_graph = NULL;
- me->stack = serd_stack_new(SERD_PAGE_SIZE);
+ me->stack = serd_stack_new(stack_size);
me->syntax = syntax;
me->next_id = 1;
me->strict = true;
diff --git a/src/reader.h b/src/reader.h
index abae5d92..92baac3f 100644
--- a/src/reader.h
+++ b/src/reader.h
@@ -35,18 +35,13 @@
# define SERD_STACK_ASSERT_TOP(reader, ref)
#endif
-/* Reference to a node in the stack (we can not use pointers since the
- stack may be reallocated, invalidating any pointers to elements).
-*/
-typedef size_t Ref;
-
typedef struct {
- Ref graph;
- Ref subject;
- Ref predicate;
- Ref object;
- Ref datatype;
- Ref lang;
+ SerdNode* graph;
+ SerdNode* subject;
+ SerdNode* predicate;
+ SerdNode* object;
+ SerdNode* datatype;
+ SerdNode* lang;
SerdStatementFlags* flags;
} ReadContext;
@@ -55,9 +50,9 @@ struct SerdReaderImpl {
const SerdSinkInterface* sink;
SerdErrorSink error_sink;
void* error_handle;
- Ref rdf_first;
- Ref rdf_rest;
- Ref rdf_nil;
+ SerdNode* rdf_first;
+ SerdNode* rdf_rest;
+ SerdNode* rdf_nil;
SerdNode* default_graph;
SerdByteSource source;
SerdStack stack;
@@ -70,33 +65,35 @@ struct SerdReaderImpl {
bool strict; ///< True iff strict parsing
bool seen_genid;
#ifdef SERD_STACK_CHECK
- Ref* allocs; ///< Stack of push offsets
+ SerdNode** allocs; ///< Stack of push offsets
size_t n_allocs; ///< Number of stack pushes
#endif
};
int r_err(SerdReader* reader, SerdStatus st, const char* fmt, ...);
-Ref push_node_padded(SerdReader* reader,
- size_t maxlen,
- SerdType type,
- const char* str,
- size_t n_bytes);
-
-Ref push_node(SerdReader* reader,
- SerdType type,
- const char* str,
- size_t n_bytes);
+SerdNode* push_node_padded(SerdReader* reader,
+ size_t maxlen,
+ SerdType type,
+ const char* str,
+ size_t n_bytes);
-size_t genid_size(SerdReader* reader);
-Ref blank_id(SerdReader* reader);
-void set_blank_id(SerdReader* reader, Ref ref, size_t buf_size);
+SerdNode* push_node(SerdReader* reader,
+ SerdType type,
+ const char* str,
+ size_t n_bytes);
-SerdNode* deref(SerdReader* reader, Ref ref);
+size_t genid_size(SerdReader* reader);
+SerdNode* blank_id(SerdReader* reader);
+void set_blank_id(SerdReader* reader, SerdNode* node, size_t buf_size);
-Ref pop_node(SerdReader* reader, Ref ref);
+SerdNode* pop_node(SerdReader* reader, const SerdNode* node);
-bool emit_statement(SerdReader* reader, ReadContext ctx, Ref o, Ref d, Ref l);
+bool emit_statement(SerdReader* reader,
+ ReadContext ctx,
+ SerdNode* o,
+ SerdNode* d,
+ SerdNode* l);
bool read_n3_statement(SerdReader* reader);
SerdStatus read_nquadsDoc(SerdReader* reader);
@@ -149,23 +146,31 @@ eat_string(SerdReader* reader, const char* str, unsigned n)
}
static inline SerdStatus
-push_byte(SerdReader* reader, Ref ref, const uint8_t c)
+push_byte(SerdReader* reader, SerdNode* node, const uint8_t c)
{
SERD_STACK_ASSERT_TOP(reader, ref);
- char* const s = (char*)serd_stack_push(&reader->stack, 1);
- SerdNode* const node = (SerdNode*)(reader->stack.buf + ref);
+ char* const s = (char*)serd_stack_push(&reader->stack, 1);
+ if (!s) {
+ return SERD_ERR_OVERFLOW;
+ }
+
++node->n_bytes;
*(s - 1) = c;
*s = '\0';
return SERD_SUCCESS;
}
-static inline void
-push_bytes(SerdReader* reader, Ref ref, const uint8_t* bytes, unsigned len)
+static inline SerdStatus
+push_bytes(SerdReader* reader, SerdNode* ref, const uint8_t* bytes, unsigned len)
{
+ if (reader->stack.buf_size < reader->stack.size + len) {
+ return SERD_ERR_OVERFLOW;
+ }
+
for (unsigned i = 0; i < len; ++i) {
push_byte(reader, ref, bytes[i]);
}
+ return SERD_SUCCESS;
}
#endif // SERD_READER_H
diff --git a/src/serdi.c b/src/serdi.c
index 1f12812b..20690c37 100644
--- a/src/serdi.c
+++ b/src/serdi.c
@@ -20,9 +20,11 @@
#include "serd/serd.h"
+#include <limits.h>
#include <stdbool.h>
+#include <stdint.h>
#include <stdio.h>
-#include <string.h>
+#include <stdlib.h>
#define SERDI_ERROR(msg) fprintf(stderr, "serdi: " msg);
#define SERDI_ERRORF(fmt, ...) fprintf(stderr, "serdi: " fmt, __VA_ARGS__);
@@ -135,6 +137,7 @@ main(int argc, char** argv)
bool lax = false;
bool use_model = false;
bool quiet = false;
+ long stack_size = 4194304;
const char* add_prefix = NULL;
const char* chop_prefix = NULL;
const char* root_uri = NULL;
@@ -171,6 +174,15 @@ main(int argc, char** argv)
} else if (!(input_syntax = get_syntax(argv[a]))) {
return print_usage(argv[0], true);
}
+ } else if (argv[a][1] == 'k') {
+ if (++a == argc) {
+ return missing_arg(argv[0], 'k');
+ }
+ stack_size = strtol(argv[a], NULL, 10);
+ if (stack_size <= 0 || stack_size == LONG_MAX) {
+ SERDI_ERRORF("stack size `%ld' out of range\n", stack_size);
+ return 1;
+ }
} else if (argv[a][1] == 'o') {
if (++a == argc) {
return missing_arg(argv[0], 'o');
@@ -263,7 +275,7 @@ main(int argc, char** argv)
sink = serd_writer_get_sink_interface(writer);
}
- reader = serd_reader_new(world, input_syntax, sink);
+ reader = serd_reader_new(world, input_syntax, sink, stack_size);
serd_reader_set_strict(reader, !lax);
if (quiet) {
serd_world_set_error_sink(world, quiet_error_sink, NULL);
diff --git a/src/stack.h b/src/stack.h
index 122b3b14..3848af66 100644
--- a/src/stack.h
+++ b/src/stack.h
@@ -61,8 +61,7 @@ serd_stack_push(SerdStack* stack, size_t n_bytes)
{
const size_t new_size = stack->size + n_bytes;
if (stack->buf_size < new_size) {
- stack->buf_size += (stack->buf_size >> 1); // *= 1.5
- stack->buf = (char*)realloc(stack->buf, stack->buf_size);
+ return NULL;
}
char* const ret = (stack->buf + stack->size);
stack->size = new_size;
@@ -80,12 +79,16 @@ static inline void*
serd_stack_push_aligned(SerdStack* stack, size_t n_bytes, size_t align)
{
// Push one byte to ensure space for a pad count
- serd_stack_push(stack, 1);
+ if (!serd_stack_push(stack, 1)) {
+ return NULL;
+ }
// Push padding if necessary
const uint8_t pad = align - stack->size % align;
if (pad > 0) {
- serd_stack_push(stack, pad);
+ if (!serd_stack_push(stack, pad)) {
+ return NULL;
+ }
}
// Set top of stack to pad count so we can properly pop later
diff --git a/src/string.c b/src/string.c
index 3ca5bd98..b32df6e3 100644
--- a/src/string.c
+++ b/src/string.c
@@ -36,6 +36,7 @@ serd_strerror(SerdStatus status)
case SERD_ERR_ID_CLASH: return "Blank node ID clash";
case SERD_ERR_BAD_CURIE: return "Invalid CURIE";
case SERD_ERR_INTERNAL: return "Internal error";
+ case SERD_ERR_OVERFLOW: return "Stack overflow";
}
return "Unknown error"; // never reached
}
diff --git a/src/writer.c b/src/writer.c
index 2f17d86d..f4f60413 100644
--- a/src/writer.c
+++ b/src/writer.c
@@ -877,7 +877,7 @@ serd_writer_new(SerdWorld* world,
writer->env = env;
writer->root_node = NULL;
writer->root_uri = SERD_URI_NULL;
- writer->anon_stack = serd_stack_new(4 * sizeof(WriteContext));
+ writer->anon_stack = serd_stack_new(SERD_PAGE_SIZE);
writer->context = context;
writer->list_subj = NULL;
writer->empty = true;
diff --git a/tests/read_chunk_test.c b/tests/read_chunk_test.c
index 1e41ca8a..82cb4277 100644
--- a/tests/read_chunk_test.c
+++ b/tests/read_chunk_test.c
@@ -61,7 +61,7 @@ main(int argc, char** argv)
{
SerdWorld* world = serd_world_new();
SerdSinkInterface sink = { 0, on_base, on_prefix, on_statement, on_end };
- SerdReader* reader = serd_reader_new(world, SERD_TURTLE, &sink);
+ SerdReader* reader = serd_reader_new(world, SERD_TURTLE, &sink, 4096);
if (!reader) {
FAIL("Failed to create reader\n");
}
diff --git a/tests/serd_test.c b/tests/serd_test.c
index e63e9594..5bb7db12 100644
--- a/tests/serd_test.c
+++ b/tests/serd_test.c
@@ -675,7 +675,7 @@ main(void)
ReaderTest rt = { 0, NULL };
SerdSinkInterface sink = { &rt, NULL, NULL, test_sink, NULL };
- SerdReader* reader = serd_reader_new(world, SERD_TURTLE, &sink);
+ SerdReader* reader = serd_reader_new(world, SERD_TURTLE, &sink, 4096);
if (!reader) {
FAIL("Failed to create reader\n");
}