aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2020-08-15 20:11:19 +0200
committerDavid Robillard <d@drobilla.net>2022-01-13 23:03:37 -0500
commita10fddf0f697e78325ddcfbc71af8f154ffd2a82 (patch)
tree75c2dd20aef99bff82dc81c93ab29b81313a9c8e
parent0839a0fda214250c41c07c799c700f4432e54963 (diff)
downloadserd-a10fddf0f697e78325ddcfbc71af8f154ffd2a82.tar.gz
serd-a10fddf0f697e78325ddcfbc71af8f154ffd2a82.tar.bz2
serd-a10fddf0f697e78325ddcfbc71af8f154ffd2a82.zip
Improve reader error handling
-rw-r--r--src/n3.c294
-rw-r--r--src/reader.c4
-rw-r--r--src/reader.h19
-rw-r--r--test/meson.build1
-rw-r--r--test/test_overflow.c163
5 files changed, 350 insertions, 131 deletions
diff --git a/src/n3.c b/src/n3.c
index f7b4f8b5..1883b6b2 100644
--- a/src/n3.c
+++ b/src/n3.c
@@ -231,8 +231,8 @@ read_utf8_character(SerdReader* const reader,
uint8_t bytes[4] = {0, 0, 0, 0};
SerdStatus st = read_utf8_bytes(reader, bytes, &size, c);
if (st) {
- push_bytes(reader, dest, replacement_char, 3);
- return st;
+ const SerdStatus rst = push_bytes(reader, dest, replacement_char, 3);
+ return rst ? rst : st;
}
return push_bytes(reader, dest, bytes, size);
@@ -248,8 +248,8 @@ read_utf8_code(SerdReader* const reader,
uint8_t bytes[4] = {0, 0, 0, 0};
SerdStatus st = read_utf8_bytes(reader, bytes, &size, c);
if (st) {
- push_bytes(reader, dest, replacement_char, 3);
- return st;
+ const SerdStatus rst = push_bytes(reader, dest, replacement_char, 3);
+ return rst ? rst : st;
}
if (!(st = push_bytes(reader, dest, bytes, size))) {
@@ -368,8 +368,9 @@ read_STRING_LITERAL_LONG(SerdReader* const reader,
break;
}
ref->flags |= SERD_HAS_QUOTE;
- push_byte(reader, ref, c);
- st = read_character(reader, ref, (uint8_t)q2);
+ if (!(st = push_byte(reader, ref, c))) {
+ st = read_character(reader, ref, (uint8_t)q2);
+ }
} else if (c == EOF) {
return r_err(reader, SERD_ERR_BAD_SYNTAX, "end of file in long string\n");
} else {
@@ -408,17 +409,14 @@ read_STRING_LITERAL(SerdReader* const reader,
break;
default:
if (c == q) {
- eat_byte_check(reader, q);
- return SERD_SUCCESS;
+ return eat_byte_check(reader, q);
} else {
st = read_character(reader, ref, (uint8_t)eat_byte_safe(reader, c));
}
}
}
- return st ? st
- : eat_byte_check(reader, q) ? SERD_SUCCESS
- : SERD_ERR_BAD_SYNTAX;
+ return st ? st : eat_byte_check(reader, q);
}
static SerdStatus
@@ -473,7 +471,7 @@ read_PN_CHARS_BASE(SerdReader* const reader, SerdNode* const dest)
const int c = peek_byte(reader);
SerdStatus st = SERD_SUCCESS;
if (is_alpha(c)) {
- push_byte(reader, dest, eat_byte_safe(reader, c));
+ st = push_byte(reader, dest, eat_byte_safe(reader, c));
} else if (c == EOF || !(c & 0x80)) {
return SERD_FAILURE;
} else if ((st = read_utf8_code(
@@ -503,7 +501,7 @@ read_PN_CHARS(SerdReader* const reader, SerdNode* const dest)
const int c = peek_byte(reader);
SerdStatus st = SERD_SUCCESS;
if (is_alpha(c) || is_digit(c) || c == '_' || c == '-') {
- push_byte(reader, dest, eat_byte_safe(reader, c));
+ st = push_byte(reader, dest, eat_byte_safe(reader, c));
} else if (c == EOF || !(c & 0x80)) {
return SERD_FAILURE;
} else if ((st = read_utf8_code(
@@ -519,15 +517,22 @@ read_PN_CHARS(SerdReader* const reader, SerdNode* const dest)
static SerdStatus
read_PERCENT(SerdReader* const reader, SerdNode* const dest)
{
- push_byte(reader, dest, eat_byte_safe(reader, '%'));
+ SerdStatus st = push_byte(reader, dest, eat_byte_safe(reader, '%'));
+ if (st) {
+ return st;
+ }
+
const uint8_t h1 = read_HEX(reader);
const uint8_t h2 = read_HEX(reader);
- if (h1 && h2) {
- push_byte(reader, dest, h1);
- return push_byte(reader, dest, h2);
+ if (!h1 || !h2) {
+ return SERD_ERR_BAD_SYNTAX;
+ }
+
+ if (!(st = push_byte(reader, dest, h1))) {
+ st = push_byte(reader, dest, h2);
}
- return SERD_ERR_BAD_SYNTAX;
+ return st;
}
static SerdStatus
@@ -557,13 +562,12 @@ read_PN_LOCAL_ESC(SerdReader* const reader, SerdNode* const dest)
case '@':
case '_':
case '~':
- push_byte(reader, dest, eat_byte_safe(reader, c));
- break;
+ return push_byte(reader, dest, eat_byte_safe(reader, c));
default:
- return r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid escape\n");
+ break;
}
- return SERD_SUCCESS;
+ return r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid escape\n");
}
static SerdStatus
@@ -601,7 +605,7 @@ read_PN_LOCAL(SerdReader* const reader,
case '9':
case ':':
case '_':
- push_byte(reader, dest, eat_byte_safe(reader, c));
+ st = push_byte(reader, dest, eat_byte_safe(reader, c));
break;
default:
if ((st = read_PLX(reader, dest)) > SERD_FAILURE) {
@@ -613,9 +617,9 @@ read_PN_LOCAL(SerdReader* const reader,
while ((c = peek_byte(reader))) { // Middle: (PN_CHARS | '.' | ':')*
if (c == '.' || c == ':') {
- push_byte(reader, dest, eat_byte_safe(reader, c));
+ st = push_byte(reader, dest, eat_byte_safe(reader, c));
} else if ((st = read_PLX(reader, dest)) > SERD_FAILURE) {
- return r_err(reader, SERD_ERR_BAD_SYNTAX, "bad escape\n");
+ return r_err(reader, st, "bad escape\n");
} else if (st != SERD_SUCCESS && (st = read_PN_CHARS(reader, dest))) {
break;
}
@@ -647,22 +651,27 @@ read_PN_PREFIX_tail(SerdReader* const reader, SerdNode* const dest)
}
if (st <= SERD_FAILURE &&
- serd_node_string(dest)[serd_node_length(dest) - 1] == '.' &&
- read_PN_CHARS(reader, dest)) {
- return r_err(reader, SERD_ERR_BAD_SYNTAX, "prefix ends with `.'\n");
+ serd_node_string(dest)[serd_node_length(dest) - 1] == '.') {
+ if ((st = read_PN_CHARS(reader, dest))) {
+ return r_err(reader,
+ st > SERD_FAILURE ? st : SERD_ERR_BAD_SYNTAX,
+ "prefix ends with `.'\n");
+ }
}
- return st > SERD_FAILURE ? st : SERD_SUCCESS;
+ return st;
}
static SerdStatus
read_PN_PREFIX(SerdReader* const reader, SerdNode* const dest)
{
- if (!read_PN_CHARS_BASE(reader, dest)) {
+ SerdStatus st = SERD_SUCCESS;
+
+ if (!(st = read_PN_CHARS_BASE(reader, dest))) {
return read_PN_PREFIX_tail(reader, dest);
}
- return SERD_FAILURE;
+ return st;
}
static SerdStatus
@@ -700,6 +709,7 @@ read_IRIREF_scheme(SerdReader* const reader, SerdNode* const dest)
return r_err(reader, SERD_ERR_BAD_SYNTAX, "bad IRI scheme start `%c'\n", c);
}
+ SerdStatus st = SERD_SUCCESS;
while ((c = peek_byte(reader)) != EOF) {
if (c == '>') {
return r_err(reader, SERD_ERR_BAD_SYNTAX, "missing IRI scheme\n");
@@ -713,30 +723,35 @@ read_IRIREF_scheme(SerdReader* const reader, SerdNode* const dest)
(char)c);
}
- push_byte(reader, dest, eat_byte_safe(reader, c));
+ if ((st = push_byte(reader, dest, eat_byte_safe(reader, c)))) {
+ return st;
+ }
+
if (c == ':') {
return SERD_SUCCESS; // End of scheme
}
}
- return r_err(reader, SERD_ERR_BAD_SYNTAX, "unexpected end of file\n");
+ return SERD_FAILURE;
}
static SerdStatus
read_IRIREF(SerdReader* const reader, SerdNode** const dest)
{
- if (!eat_byte_check(reader, '<')) {
- return SERD_ERR_BAD_SYNTAX;
+ SerdStatus st = SERD_SUCCESS;
+ if ((st = eat_byte_check(reader, '<'))) {
+ return st;
}
- *dest = push_node(reader, SERD_URI, "", 0);
+ if (!(*dest = push_node(reader, SERD_URI, "", 0))) {
+ return SERD_ERR_OVERFLOW;
+ }
- if (!fancy_syntax(reader) && read_IRIREF_scheme(reader, *dest)) {
- return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected IRI scheme\n");
+ if (!fancy_syntax(reader) && (st = read_IRIREF_scheme(reader, *dest))) {
+ return r_err(reader, st, "expected IRI scheme\n");
}
- SerdStatus st = SERD_SUCCESS;
- uint32_t code = 0;
+ uint32_t code = 0;
while (st <= SERD_FAILURE) {
const int c = eat_byte_safe(reader, peek_byte(reader));
switch (c) {
@@ -780,10 +795,11 @@ read_IRIREF(SerdReader* const reader, SerdNode** const dest)
break;
}
- st = SERD_FAILURE;
- push_byte(reader, *dest, c);
+ if (!(st = push_byte(reader, *dest, c))) {
+ st = SERD_FAILURE;
+ }
} else if (!(c & 0x80)) {
- push_byte(reader, *dest, c);
+ st = push_byte(reader, *dest, c);
} else if (read_utf8_character(reader, *dest, (uint8_t)c)) {
if (reader->strict) {
return SERD_ERR_BAD_SYNTAX;
@@ -810,11 +826,12 @@ read_PrefixedName(SerdReader* const reader,
return SERD_FAILURE;
}
- push_byte(reader, dest, eat_byte_safe(reader, ':'));
-
- st = read_PN_LOCAL(reader, dest, ate_dot);
+ if ((st = push_byte(reader, dest, eat_byte_safe(reader, ':'))) ||
+ (st = read_PN_LOCAL(reader, dest, ate_dot)) > SERD_FAILURE) {
+ return st;
+ }
- return (st > SERD_FAILURE) ? st : SERD_SUCCESS;
+ return SERD_SUCCESS;
}
static SerdStatus
@@ -830,7 +847,7 @@ read_0_9(SerdReader* const reader, SerdNode* const str, const bool at_least_one)
return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected digit\n");
}
- return SERD_SUCCESS;
+ return st;
}
static SerdStatus
@@ -852,13 +869,13 @@ read_number(SerdReader* const reader,
}
if (c == '-' || c == '+') {
- push_byte(reader, *dest, eat_byte_safe(reader, c));
+ TRY(st, push_byte(reader, *dest, eat_byte_safe(reader, c)));
}
if ((c = peek_byte(reader)) == '.') {
has_decimal = true;
// decimal case 2 (e.g. '.0' or `-.0' or `+.0')
- push_byte(reader, *dest, eat_byte_safe(reader, c));
+ TRY(st, push_byte(reader, *dest, eat_byte_safe(reader, c)));
TRY(st, read_0_9(reader, *dest, true));
} else {
// all other cases ::= ( '-' | '+' ) [0-9]+ ( . )? ( [0-9]+ )? ...
@@ -874,18 +891,18 @@ read_number(SerdReader* const reader,
return SERD_SUCCESS; // Next byte is not a number character
}
- push_byte(reader, *dest, '.');
+ TRY(st, push_byte(reader, *dest, '.'));
read_0_9(reader, *dest, false);
}
}
c = peek_byte(reader);
if (c == 'e' || c == 'E') {
// double
- push_byte(reader, *dest, eat_byte_safe(reader, c));
+ TRY(st, push_byte(reader, *dest, eat_byte_safe(reader, c)));
switch ((c = peek_byte(reader))) {
case '+':
case '-':
- push_byte(reader, *dest, eat_byte_safe(reader, c));
+ TRY(st, push_byte(reader, *dest, eat_byte_safe(reader, c)));
break;
default:
break;
@@ -911,10 +928,9 @@ read_iri(SerdReader* const reader, SerdNode** const dest, bool* const ate_dot)
case '<':
return read_IRIREF(reader, dest);
default:
- if (!(*dest = push_node(reader, SERD_CURIE, "", 0))) {
- return SERD_ERR_OVERFLOW;
- }
- return read_PrefixedName(reader, *dest, true, ate_dot);
+ *dest = push_node(reader, SERD_CURIE, "", 0);
+ return *dest ? read_PrefixedName(reader, *dest, true, ate_dot)
+ : SERD_ERR_OVERFLOW;
}
}
@@ -923,11 +939,12 @@ read_literal(SerdReader* const reader,
SerdNode** const dest,
bool* const ate_dot)
{
- *dest = push_node(reader, SERD_LITERAL, "", 0);
+ if (!(*dest = push_node(reader, SERD_LITERAL, "", 0))) {
+ return SERD_ERR_OVERFLOW;
+ }
SerdStatus st = read_String(reader, *dest);
if (st) {
- *dest = NULL;
return st;
}
@@ -936,17 +953,13 @@ read_literal(SerdReader* const reader,
case '@':
eat_byte_safe(reader, '@');
(*dest)->flags |= SERD_HAS_LANGUAGE;
- if ((st = read_LANGTAG(reader))) {
- return r_err(reader, st, "bad literal\n");
- }
+ TRY(st, read_LANGTAG(reader));
break;
case '^':
eat_byte_safe(reader, '^');
- eat_byte_check(reader, '^');
+ TRY(st, eat_byte_check(reader, '^'));
(*dest)->flags |= SERD_HAS_DATATYPE;
- if ((st = read_iri(reader, &datatype, ate_dot))) {
- return r_err(reader, st, "bad literal\n");
- }
+ TRY(st, read_iri(reader, &datatype, ate_dot));
break;
}
return SERD_SUCCESS;
@@ -967,21 +980,26 @@ read_verb(SerdReader* const reader, SerdNode** const dest)
return SERD_ERR_OVERFLOW;
}
- SerdStatus st = read_PN_PREFIX(reader, *dest);
- bool ate_dot = false;
- SerdNode* node = *dest;
- const int next = peek_byte(reader);
- if (!st && node->length == 1 && serd_node_string(node)[0] == 'a' &&
- next != ':' && !is_PN_CHARS_BASE((uint32_t)next)) {
+ SerdStatus st = read_PN_PREFIX(reader, *dest);
+ if (st > SERD_FAILURE) {
+ return st;
+ }
+
+ bool ate_dot = false;
+ SerdNode* node = *dest;
+ const int next = peek_byte(reader);
+ if (node->length == 1 && serd_node_string(node)[0] == 'a' && next != ':' &&
+ !is_PN_CHARS_BASE((uint32_t)next)) {
serd_stack_pop_to(&reader->stack, orig_stack_size);
- *dest = push_node(reader, SERD_URI, NS_RDF "type", 47);
- return SERD_SUCCESS;
+ return ((*dest = push_node(reader, SERD_URI, NS_RDF "type", 47))
+ ? SERD_SUCCESS
+ : SERD_ERR_OVERFLOW);
}
- if (st > SERD_FAILURE || read_PrefixedName(reader, *dest, false, &ate_dot) ||
- ate_dot) {
+ if ((st = read_PrefixedName(reader, *dest, false, &ate_dot)) || ate_dot) {
*dest = NULL;
- return r_err(reader, SERD_ERR_BAD_SYNTAX, "bad verb\n");
+ return r_err(
+ reader, st > SERD_FAILURE ? st : SERD_ERR_BAD_SYNTAX, "expected verb\n");
}
return SERD_SUCCESS;
@@ -992,19 +1010,22 @@ read_BLANK_NODE_LABEL(SerdReader* const reader,
SerdNode** const dest,
bool* const ate_dot)
{
- eat_byte_safe(reader, '_');
- eat_byte_check(reader, ':');
-
SerdStatus st = SERD_SUCCESS;
- SerdNode* n = *dest = push_node(reader,
- SERD_BLANK,
- reader->bprefix ? reader->bprefix : "",
- reader->bprefix_len);
+ eat_byte_safe(reader, '_');
+ TRY(st, eat_byte_check(reader, ':'));
- int c = peek_byte(reader); // First: (PN_CHARS | '_' | [0-9])
+ if (!(*dest = push_node(reader,
+ SERD_BLANK,
+ reader->bprefix ? reader->bprefix : "",
+ reader->bprefix_len))) {
+ return SERD_ERR_OVERFLOW;
+ }
+
+ SerdNode* n = *dest;
+ int c = peek_byte(reader); // First: (PN_CHARS | '_' | [0-9])
if (is_digit(c) || c == '_') {
- push_byte(reader, n, eat_byte_safe(reader, c));
+ TRY(st, push_byte(reader, n, eat_byte_safe(reader, c)));
} else if ((st = read_PN_CHARS(reader, n))) {
return r_err(reader, st, "invalid name start\n");
}
@@ -1061,7 +1082,9 @@ read_anon(SerdReader* const reader,
}
if (!*dest) {
- *dest = blank_id(reader);
+ if (!(*dest = blank_id(reader))) {
+ return SERD_ERR_OVERFLOW;
+ }
}
SerdStatus st = SERD_SUCCESS;
@@ -1075,8 +1098,12 @@ read_anon(SerdReader* const reader,
if (!subject) {
*ctx.flags |= SERD_ANON_CONT;
}
+
bool ate_dot_in_list = false;
- read_predicateObjectList(reader, ctx, &ate_dot_in_list);
+ if ((st = read_predicateObjectList(reader, ctx, &ate_dot_in_list))) {
+ return st;
+ }
+
if (ate_dot_in_list) {
return r_err(reader, SERD_ERR_BAD_SYNTAX, "`.' inside blank\n");
}
@@ -1084,8 +1111,8 @@ read_anon(SerdReader* const reader,
serd_sink_write_end(reader->sink, *dest);
*ctx.flags = old_flags;
}
- return (eat_byte_check(reader, ']') == ']') ? SERD_SUCCESS
- : SERD_ERR_BAD_SYNTAX;
+
+ return eat_byte_check(reader, ']');
}
/* If emit is true: recurses, calling statement_sink for every statement
@@ -1117,6 +1144,7 @@ read_object(SerdReader* const reader,
return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected: ':', '<', or '_'\n");
}
}
+
switch (c) {
case EOF:
case ')':
@@ -1133,6 +1161,8 @@ read_object(SerdReader* const reader,
ret = read_BLANK_NODE_LABEL(reader, &o, ate_dot);
break;
case '<':
+ ret = read_IRIREF(reader, &o);
+ break;
case ':':
ret = read_iri(reader, &o, ate_dot);
break;
@@ -1163,21 +1193,26 @@ read_object(SerdReader* const reader,
return SERD_ERR_OVERFLOW;
}
- while (!read_PN_CHARS_BASE(reader, o)) {
+ while (!(ret = read_PN_CHARS_BASE(reader, o))) {
+ }
+
+ if (ret > SERD_FAILURE) {
+ return ret;
}
+
if ((o->length == 4 && !memcmp(serd_node_string(o), "true", 4)) ||
(o->length == 5 && !memcmp(serd_node_string(o), "false", 5))) {
o->flags |= SERD_HAS_DATATYPE;
o->type = SERD_LITERAL;
- push_node(reader, SERD_URI, XSD_BOOLEAN, XSD_BOOLEAN_LEN);
- ret = SERD_SUCCESS;
- } else if (read_PN_PREFIX_tail(reader, o) > SERD_FAILURE) {
- ret = SERD_ERR_BAD_SYNTAX;
- } else {
- if ((ret = read_PrefixedName(reader, o, false, ate_dot))) {
- ret = ret > SERD_FAILURE ? ret : SERD_ERR_BAD_SYNTAX;
- return r_err(reader, ret, "expected prefixed name\n");
+ if (!(push_node(reader, SERD_URI, XSD_BOOLEAN, XSD_BOOLEAN_LEN))) {
+ ret = SERD_ERR_OVERFLOW;
+ } else {
+ ret = SERD_SUCCESS;
}
+ } else if ((ret = read_PN_PREFIX_tail(reader, o)) > SERD_FAILURE ||
+ (ret = read_PrefixedName(reader, o, false, ate_dot))) {
+ ret = (ret > SERD_FAILURE) ? ret : SERD_ERR_BAD_SYNTAX;
+ return r_err(reader, ret, "expected prefixed name\n");
}
}
@@ -1261,11 +1296,8 @@ static SerdStatus
end_collection(SerdReader* const reader, ReadContext ctx, const SerdStatus st)
{
*ctx.flags &= ~(unsigned)SERD_LIST_CONT;
- if (!st) {
- return (eat_byte_check(reader, ')') == ')') ? SERD_SUCCESS
- : SERD_ERR_BAD_SYNTAX;
- }
- return st;
+
+ return st ? st : eat_byte_check(reader, ')');
}
static SerdStatus
@@ -1277,6 +1309,11 @@ read_collection(SerdReader* const reader,
eat_byte_safe(reader, '(');
bool end = peek_delim(reader, ')');
*dest = end ? reader->rdf_nil : blank_id(reader);
+
+ if (!*dest) {
+ return SERD_ERR_OVERFLOW;
+ }
+
if (ctx.subject) {
// subject predicate _:head
*ctx.flags |= (end ? 0 : SERD_LIST_O_BEGIN);
@@ -1315,6 +1352,7 @@ read_collection(SerdReader* const reader,
used and > IDs generated by read_object above. */
if (!rest) {
rest = blank_id(reader); // First pass, push
+ assert(rest); // Can't overflow since read_object() popped
} else {
set_blank_id(reader, rest, genid_size(reader));
}
@@ -1343,7 +1381,7 @@ read_subject(SerdReader* const reader,
bool ate_dot = false;
switch ((*s_type = peek_byte(reader))) {
case '[':
- read_anon(reader, ctx, true, dest);
+ st = read_anon(reader, ctx, true, dest);
break;
case '(':
st = read_collection(reader, ctx, dest);
@@ -1365,16 +1403,18 @@ read_subject(SerdReader* const reader,
static SerdStatus
read_labelOrSubject(SerdReader* const reader, SerdNode** const dest)
{
- bool ate_dot = false;
+ SerdStatus st = SERD_SUCCESS;
+ bool ate_dot = false;
+
switch (peek_byte(reader)) {
case '[':
eat_byte_safe(reader, '[');
read_ws_star(reader);
- if (!eat_byte_check(reader, ']')) {
- return SERD_ERR_BAD_SYNTAX;
+ if ((st = eat_byte_check(reader, ']'))) {
+ return st;
}
*dest = blank_id(reader);
- return SERD_SUCCESS;
+ return *dest ? SERD_SUCCESS : SERD_ERR_OVERFLOW;
case '_':
return read_BLANK_NODE_LABEL(reader, dest, &ate_dot);
default:
@@ -1418,12 +1458,17 @@ read_base(SerdReader* const reader, const bool sparql, const bool token)
SerdNode* uri = NULL;
TRY(st, read_IRIREF(reader, &uri));
+
+ if (reader->stack.size + sizeof(SerdNode) > reader->stack.buf_size) {
+ return SERD_ERR_OVERFLOW;
+ }
+
serd_node_zero_pad(uri);
TRY(st, serd_sink_write_base(reader->sink, uri));
read_ws_star(reader);
if (!sparql) {
- return eat_byte_check(reader, '.') ? SERD_SUCCESS : SERD_ERR_BAD_SYNTAX;
+ return eat_byte_check(reader, '.');
}
if (peek_byte(reader) == '.') {
@@ -1451,21 +1496,25 @@ read_prefixID(SerdReader* const reader, const bool sparql, const bool token)
return st;
}
- if (eat_byte_check(reader, ':') != ':') {
- return SERD_ERR_BAD_SYNTAX;
+ if ((st = eat_byte_check(reader, ':'))) {
+ return st;
}
read_ws_star(reader);
SerdNode* uri = NULL;
TRY(st, read_IRIREF(reader, &uri));
+ if (reader->stack.size + sizeof(SerdNode) > reader->stack.buf_size) {
+ return SERD_ERR_OVERFLOW;
+ }
+
serd_node_zero_pad(name);
serd_node_zero_pad(uri);
st = serd_sink_write_prefix(reader->sink, name, uri);
if (!sparql) {
read_ws_star(reader);
- st = eat_byte_check(reader, '.') ? SERD_SUCCESS : SERD_ERR_BAD_SYNTAX;
+ st = eat_byte_check(reader, '.');
}
return st;
}
@@ -1500,8 +1549,9 @@ read_directive(SerdReader* const reader)
static SerdStatus
read_wrappedGraph(SerdReader* const reader, ReadContext* const ctx)
{
- if (!eat_byte_check(reader, '{')) {
- return SERD_ERR_BAD_SYNTAX;
+ SerdStatus st = SERD_SUCCESS;
+ if ((st = eat_byte_check(reader, '{'))) {
+ return st;
}
read_ws_star(reader);
@@ -1510,10 +1560,9 @@ read_wrappedGraph(SerdReader* const reader, ReadContext* const ctx)
bool ate_dot = false;
int s_type = 0;
- ctx->subject = 0;
- SerdStatus st = read_subject(reader, *ctx, &ctx->subject, &s_type);
- if (st) {
- return r_err(reader, SERD_ERR_BAD_SYNTAX, "bad subject\n");
+ ctx->subject = 0;
+ if ((st = read_subject(reader, *ctx, &ctx->subject, &s_type))) {
+ return r_err(reader, st, "expected subject\n");
}
if (read_triples(reader, *ctx, &ate_dot) && s_type != '[') {
@@ -1616,8 +1665,7 @@ read_n3_statement(SerdReader* const reader)
return st > SERD_FAILURE ? st : SERD_ERR_BAD_SYNTAX;
} else if (!ate_dot) {
read_ws_star(reader);
- st = (eat_byte_check(reader, '.') == '.') ? SERD_SUCCESS
- : SERD_ERR_BAD_SYNTAX;
+ st = eat_byte_check(reader, '.');
}
break;
}
@@ -1696,9 +1744,7 @@ read_nquadsDoc(SerdReader* const reader)
// Terminating '.'
read_ws_star(reader);
- if (!eat_byte_check(reader, '.')) {
- return SERD_ERR_BAD_SYNTAX;
- }
+ TRY(st, eat_byte_check(reader, '.'));
}
TRY(st, emit_statement(reader, ctx, ctx.object));
diff --git a/src/reader.c b/src/reader.c
index 1677ed92..354ff478 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -124,6 +124,10 @@ emit_statement(SerdReader* const reader,
graph = reader->default_graph;
}
+ if (reader->stack.size + (2 * sizeof(SerdNode)) > reader->stack.buf_size) {
+ return SERD_ERR_OVERFLOW;
+ }
+
/* Zero the pad of the object node on the top of the stack. Lower nodes
(subject and predicate) were already zeroed by subsequent pushes. */
serd_node_zero_pad(o);
diff --git a/src/reader.h b/src/reader.h
index 9f48601d..632d5257 100644
--- a/src/reader.h
+++ b/src/reader.h
@@ -120,26 +120,31 @@ eat_byte_safe(SerdReader* reader, const int byte)
return c;
}
-static inline int
+static inline SerdStatus
eat_byte_check(SerdReader* reader, const int byte)
{
const int c = peek_byte(reader);
if (c != byte) {
- r_err(reader, SERD_ERR_BAD_SYNTAX, "expected `%c', not `%c'\n", byte, c);
- return 0;
+ return r_err(
+ reader, SERD_ERR_BAD_SYNTAX, "expected `%c', not `%c'\n", byte, c);
}
- return eat_byte_safe(reader, byte);
+
+ eat_byte_safe(reader, byte);
+ return SERD_SUCCESS;
}
static inline SerdStatus
eat_string(SerdReader* reader, const char* str, unsigned n)
{
+ SerdStatus st = SERD_SUCCESS;
+
for (unsigned i = 0; i < n; ++i) {
- if (!eat_byte_check(reader, ((const uint8_t*)str)[i])) {
- return SERD_ERR_BAD_SYNTAX;
+ if ((st = eat_byte_check(reader, ((const uint8_t*)str)[i]))) {
+ return st;
}
}
- return SERD_SUCCESS;
+
+ return st;
}
static inline SerdStatus
diff --git a/test/meson.build b/test/meson.build
index 3ec9d38b..a57e2cf2 100644
--- a/test/meson.build
+++ b/test/meson.build
@@ -8,6 +8,7 @@ unit_tests = [
'env',
'free_null',
'node',
+ 'overflow',
'read_chunk',
'reader_writer',
'sink',
diff --git a/test/test_overflow.c b/test/test_overflow.c
new file mode 100644
index 00000000..13516388
--- /dev/null
+++ b/test/test_overflow.c
@@ -0,0 +1,163 @@
+/*
+ Copyright 2018 David Robillard <d@drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#undef NDEBUG
+
+#include "serd/serd.h"
+
+#include <assert.h>
+#include <stdio.h>
+
+static const size_t min_stack_size = 4 * sizeof(size_t) + 256u;
+static const size_t max_stack_size = 1024u;
+
+static SerdStatus
+test_size(const char* const str,
+ const SerdSyntax syntax,
+ const size_t stack_size)
+{
+ SerdSink* sink = serd_sink_new(NULL, NULL);
+ SerdReader* const reader = serd_reader_new(syntax, sink, stack_size);
+ assert(reader);
+
+ serd_reader_start_string(reader, str);
+ const SerdStatus st = serd_reader_read_document(reader);
+ serd_reader_free(reader);
+ serd_sink_free(sink);
+
+ return st;
+}
+
+static void
+test_all_sizes(const char* const str, const SerdSyntax syntax)
+{
+ // Ensure reading with the maximum stack size succeeds
+ SerdStatus st = test_size(str, syntax, max_stack_size);
+ assert(!st);
+
+ // Test with an increasingly smaller stack
+ for (size_t size = max_stack_size; size > min_stack_size; --size) {
+ if ((st = test_size(str, syntax, size))) {
+ assert(st == SERD_ERR_OVERFLOW);
+ }
+ }
+
+ assert(st == SERD_ERR_OVERFLOW);
+}
+
+static void
+test_ntriples_overflow(void)
+{
+ static const char* const test_strings[] = {
+ "<http://example.org/s> <http://example.org/p> <http://example.org/o> .",
+ NULL,
+ };
+
+ for (const char* const* t = test_strings; *t; ++t) {
+ test_all_sizes(*t, SERD_NTRIPLES);
+ }
+}
+
+static void
+test_turtle_overflow(void)
+{
+ static const char* const test_strings[] = {
+ "<http://example.org/s> <http://example.org/p> <http://example.org/> .",
+ "<http://example.org/s> <http://example.org/p> "
+ "<thisisanabsurdlylongurischeme://because/testing/> .",
+ "<http://example.org/s> <http://example.org/p> 1234 .",
+ "<http://example.org/s> <http://example.org/p> (1 2 3 4) .",
+ "<http://example.org/s> <http://example.org/p> ((((((((42)))))))) .",
+ "<http://example.org/s> <http://example.org/p> \"literal\" .",
+ "<http://example.org/s> <http://example.org/p> _:blank .",
+ "<http://example.org/s> <http://example.org/p> true .",
+ "<http://example.org/s> <http://example.org/p> \"\"@en .",
+ "(((((((((42))))))))) <http://example.org/p> <http://example.org/o> .",
+ "@prefix eg: <http://example.org/ns/test> .",
+ "@base <http://example.org/base> .",
+
+ "@prefix eg: <http://example.org/> . \neg:s eg:p eg:o .\n",
+
+ "@prefix ug.dot: <http://example.org/> . \nug.dot:s ug.dot:p ug.dot:o .\n",
+
+ // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
+ "@prefix øøøøøøøøø: <http://example.org/long> . \n"
+ "<http://example.org/somewhatlongsubjecttooffsetthepredicate> øøøøøøøøø:p "
+ "øøøøøøøøø:o .\n",
+
+ // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
+ "<http://example.org/subject/with/a/long/path> "
+ "<http://example.org/predicate/with/a/long/path> "
+ "<http://example.org/object/with/a/long/path> .",
+
+ // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
+ "<http://example.org/s> <http://example.org/p> "
+ "\"typed\"^^<http://example.org/Datatype> .",
+
+ // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
+ "@prefix eg: <http://example.org/ns/test> .\n"
+ "<http://example.org/s> <http://example.org/p> "
+ "\"typed\"^^eg:Datatype .",
+
+ // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
+ "@prefix eg: <http://example.org/ns/test> .\n"
+ "<http://example.org/s> <http://example.org/p> eg:foo .",
+
+ // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
+ "@prefix prefix: <http://example.org/testing/curies> .\n"
+ "prefix:subject prefix:predicate prefix:object .\n",
+
+ // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
+ "@prefix prefix: <http://example.org/testing/curies> .\n"
+ "prefix:subjectthatwillcomearoundtobeingfinishedanycharacternow "
+ "prefix:predicate prefix:object .\n",
+
+ // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
+ "@prefix eg: <http://example.org/> .\n"
+ "eg:s eg:p [ eg:p [ eg:p [ eg:p [ eg:p eg:o ] ] ] ] .\n",
+
+ // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
+ "@prefix eg: <http://example.org/> .\n"
+ "eg:s eg:p ( 1 2 3 ( 4 5 6 ( 7 8 9 ) ) ) .\n",
+
+ // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
+ "@prefix eg: <http://example.org/ns/test> .\n"
+ "<http://example.org/s> <http://example.org/p> eg:%99 .",
+
+ // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
+ "@prefix øøøøøøøøø: <http://example.org/long> .\n"
+ "<http://example.org/somewhatlongsubjecttooffsetthepredicate> øøøøøøøøø:p "
+ "øøøøøøøøø:o .\n",
+
+ // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
+ "@base <http://example.org/ns/test> .\n"
+ "<http://example.org/s> <http://example.org/p> <rel> .",
+
+ NULL,
+ };
+
+ for (const char* const* t = test_strings; *t; ++t) {
+ test_all_sizes(*t, SERD_TURTLE);
+ }
+}
+
+int
+main(void)
+{
+ test_ntriples_overflow();
+ test_turtle_overflow();
+ return 0;
+}