aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/n3.c281
-rw-r--r--src/reader.c4
-rw-r--r--src/reader.h19
-rw-r--r--test/meson.build1
-rw-r--r--test/test_overflow.c152
5 files changed, 320 insertions, 137 deletions
diff --git a/src/n3.c b/src/n3.c
index f0ef162a..09f00c54 100644
--- a/src/n3.c
+++ b/src/n3.c
@@ -210,8 +210,8 @@ read_utf8_character(SerdReader* const reader,
uint8_t bytes[4] = {0, 0, 0, 0};
SerdStatus st = read_utf8_bytes(reader, bytes, &size, c);
if (st) {
- push_bytes(reader, dest, replacement_char, 3);
- return st;
+ const SerdStatus rst = push_bytes(reader, dest, replacement_char, 3);
+ return rst ? rst : st;
}
return push_bytes(reader, dest, bytes, size);
@@ -227,8 +227,8 @@ read_utf8_code(SerdReader* const reader,
uint8_t bytes[4] = {0, 0, 0, 0};
SerdStatus st = read_utf8_bytes(reader, bytes, &size, c);
if (st) {
- push_bytes(reader, dest, replacement_char, 3);
- return st;
+ const SerdStatus rst = push_bytes(reader, dest, replacement_char, 3);
+ return rst ? rst : st;
}
if (!(st = push_bytes(reader, dest, bytes, size))) {
@@ -361,8 +361,9 @@ read_STRING_LITERAL_LONG(SerdReader* const reader,
st = read_string_escape(reader, ref);
} else {
ref->flags |= SERD_HAS_QUOTE;
- push_byte(reader, ref, c);
- st = read_character(reader, ref, (uint8_t)q2);
+ if (!(st = push_byte(reader, ref, c))) {
+ st = read_character(reader, ref, (uint8_t)q2);
+ }
}
} else if (c == EOF) {
st = r_err(reader, SERD_BAD_SYNTAX, "end of file in long string\n");
@@ -404,7 +405,7 @@ read_STRING_LITERAL(SerdReader* const reader,
}
}
- return eat_byte_check(reader, q) ? SERD_SUCCESS : SERD_BAD_SYNTAX;
+ return st ? st : eat_byte_check(reader, q);
}
static SerdStatus
@@ -466,7 +467,7 @@ read_PN_CHARS_BASE(SerdReader* const reader, SerdNode* const dest)
}
skip_byte(reader, c);
- read_utf8_code(reader, dest, &code, (uint8_t)c);
+ TRY(st, read_utf8_code(reader, dest, &code, (uint8_t)c));
if (!is_PN_CHARS_BASE(code)) {
r_err(reader, SERD_BAD_SYNTAX, "invalid character U+%04X in name\n", code);
@@ -514,15 +515,22 @@ read_PN_CHARS(SerdReader* const reader, SerdNode* const dest)
static SerdStatus
read_PERCENT(SerdReader* const reader, SerdNode* const dest)
{
- push_byte(reader, dest, eat_byte_safe(reader, '%'));
+ SerdStatus st = push_byte(reader, dest, eat_byte_safe(reader, '%'));
+ if (st) {
+ return st;
+ }
+
const uint8_t h1 = read_HEX(reader);
const uint8_t h2 = read_HEX(reader);
- if (h1 && h2) {
- push_byte(reader, dest, h1);
- return push_byte(reader, dest, h2);
+ if (!h1 || !h2) {
+ return SERD_BAD_SYNTAX;
}
- return SERD_BAD_SYNTAX;
+ if (!(st = push_byte(reader, dest, h1))) {
+ st = push_byte(reader, dest, h2);
+ }
+
+ return st;
}
static SerdStatus
@@ -552,13 +560,12 @@ read_PN_LOCAL_ESC(SerdReader* const reader, SerdNode* const dest)
case '@':
case '_':
case '~':
- push_byte(reader, dest, eat_byte_safe(reader, c));
- break;
+ return push_byte(reader, dest, eat_byte_safe(reader, c));
default:
- return r_err(reader, SERD_BAD_SYNTAX, "invalid escape\n");
+ break;
}
- return SERD_SUCCESS;
+ return r_err(reader, SERD_BAD_SYNTAX, "invalid escape\n");
}
static SerdStatus
@@ -596,7 +603,7 @@ read_PN_LOCAL(SerdReader* const reader,
case '9':
case ':':
case '_':
- push_byte(reader, dest, eat_byte_safe(reader, c));
+ st = push_byte(reader, dest, eat_byte_safe(reader, c));
break;
default:
if ((st = read_PLX(reader, dest)) > SERD_FAILURE) {
@@ -610,9 +617,9 @@ read_PN_LOCAL(SerdReader* const reader,
while ((c = peek_byte(reader))) { // Middle: (PN_CHARS | '.' | ':')*
if (c == '.' || c == ':') {
- push_byte(reader, dest, eat_byte_safe(reader, c));
+ st = push_byte(reader, dest, eat_byte_safe(reader, c));
} else if ((st = read_PLX(reader, dest)) > SERD_FAILURE) {
- return r_err(reader, SERD_BAD_SYNTAX, "bad escape\n");
+ return r_err(reader, st, "bad escape\n");
} else if (st != SERD_SUCCESS && (st = read_PN_CHARS(reader, dest))) {
break;
}
@@ -644,12 +651,15 @@ read_PN_PREFIX_tail(SerdReader* const reader, SerdNode* const dest)
}
if (st <= SERD_FAILURE &&
- serd_node_string(dest)[serd_node_length(dest) - 1] == '.' &&
- read_PN_CHARS(reader, dest)) {
- return r_err(reader, SERD_BAD_SYNTAX, "prefix ends with '.'\n");
+ serd_node_string(dest)[serd_node_length(dest) - 1] == '.') {
+ if ((st = read_PN_CHARS(reader, dest))) {
+ return r_err(reader,
+ st > SERD_FAILURE ? st : SERD_BAD_SYNTAX,
+ "prefix ends with '.'\n");
+ }
}
- return st > SERD_FAILURE ? st : SERD_SUCCESS;
+ return st;
}
static SerdStatus
@@ -695,6 +705,7 @@ read_IRIREF_scheme(SerdReader* const reader, SerdNode* const dest)
return r_err(reader, SERD_BAD_SYNTAX, "bad IRI scheme start '%c'\n", c);
}
+ SerdStatus st = SERD_SUCCESS;
while ((c = peek_byte(reader)) != EOF) {
if (c == '>') {
return r_err(reader, SERD_BAD_SYNTAX, "missing IRI scheme\n");
@@ -708,30 +719,33 @@ read_IRIREF_scheme(SerdReader* const reader, SerdNode* const dest)
(char)c);
}
- push_byte(reader, dest, eat_byte_safe(reader, c));
+ if ((st = push_byte(reader, dest, eat_byte_safe(reader, c)))) {
+ return st;
+ }
+
if (c == ':') {
return SERD_SUCCESS; // End of scheme
}
}
- return r_err(reader, SERD_BAD_SYNTAX, "unexpected end of file\n");
+ return SERD_FAILURE;
}
static SerdStatus
read_IRIREF(SerdReader* const reader, SerdNode** const dest)
{
- if (!eat_byte_check(reader, '<')) {
- return SERD_BAD_SYNTAX;
- }
+ SerdStatus st = SERD_SUCCESS;
+ TRY(st, eat_byte_check(reader, '<'));
- *dest = push_node(reader, SERD_URI, "", 0);
+ if (!(*dest = push_node(reader, SERD_URI, "", 0))) {
+ return SERD_BAD_STACK;
+ }
- if (!fancy_syntax(reader) && read_IRIREF_scheme(reader, *dest)) {
- return r_err(reader, SERD_BAD_SYNTAX, "expected IRI scheme\n");
+ if (!fancy_syntax(reader) && (st = read_IRIREF_scheme(reader, *dest))) {
+ return r_err(reader, st, "expected IRI scheme\n");
}
- SerdStatus st = SERD_SUCCESS;
- uint32_t code = 0;
+ uint32_t code = 0;
while (st <= SERD_FAILURE) {
const int c = eat_byte_safe(reader, peek_byte(reader));
switch (c) {
@@ -773,10 +787,11 @@ read_IRIREF(SerdReader* const reader, SerdNode** const dest)
break;
}
- st = SERD_FAILURE;
- push_byte(reader, *dest, c);
+ if (!(st = push_byte(reader, *dest, c))) {
+ st = SERD_FAILURE;
+ }
} else if (!(c & 0x80)) {
- push_byte(reader, *dest, c);
+ st = push_byte(reader, *dest, c);
} else if (read_utf8_character(reader, *dest, (uint8_t)c)) {
if (reader->strict) {
return SERD_BAD_SYNTAX;
@@ -803,8 +818,7 @@ read_PrefixedName(SerdReader* const reader,
return SERD_FAILURE;
}
- push_byte(reader, dest, eat_byte_safe(reader, ':'));
-
+ TRY(st, push_byte(reader, dest, eat_byte_safe(reader, ':')));
TRY_FAILING(st, read_PN_LOCAL(reader, dest, ate_dot));
return SERD_SUCCESS;
}
@@ -822,7 +836,7 @@ read_0_9(SerdReader* const reader, SerdNode* const str, const bool at_least_one)
return r_err(reader, SERD_BAD_SYNTAX, "expected digit\n");
}
- return SERD_SUCCESS;
+ return st;
}
static SerdStatus
@@ -844,13 +858,13 @@ read_number(SerdReader* const reader,
}
if (c == '-' || c == '+') {
- push_byte(reader, *dest, eat_byte_safe(reader, c));
+ TRY(st, push_byte(reader, *dest, eat_byte_safe(reader, c)));
}
if ((c = peek_byte(reader)) == '.') {
has_decimal = true;
- // decimal case 2 (e.g. '.0' or '-.0' or '+.0')
- push_byte(reader, *dest, eat_byte_safe(reader, c));
+ // decimal case 2 (e.g. ".0" or "-.0" or "+.0")
+ TRY(st, push_byte(reader, *dest, eat_byte_safe(reader, c)));
TRY(st, read_0_9(reader, *dest, true));
} else {
// all other cases ::= ( '-' | '+' ) [0-9]+ ( . )? ( [0-9]+ )? ...
@@ -866,7 +880,7 @@ read_number(SerdReader* const reader,
return SERD_SUCCESS; // Next byte is not a number character
}
- push_byte(reader, *dest, '.');
+ TRY(st, push_byte(reader, *dest, '.'));
read_0_9(reader, *dest, false);
}
}
@@ -876,11 +890,11 @@ read_number(SerdReader* const reader,
c = peek_byte(reader);
if (c == 'e' || c == 'E') {
// double
- push_byte(reader, *dest, eat_byte_safe(reader, c));
+ TRY(st, push_byte(reader, *dest, eat_byte_safe(reader, c)));
switch ((c = peek_byte(reader))) {
case '+':
case '-':
- push_byte(reader, *dest, eat_byte_safe(reader, c));
+ TRY(st, push_byte(reader, *dest, eat_byte_safe(reader, c)));
break;
default:
break;
@@ -906,10 +920,9 @@ read_iri(SerdReader* const reader, SerdNode** const dest, bool* const ate_dot)
case '<':
return read_IRIREF(reader, dest);
default:
- if (!(*dest = push_node(reader, SERD_CURIE, "", 0))) {
- return SERD_BAD_STACK;
- }
- return read_PrefixedName(reader, *dest, true, ate_dot);
+ *dest = push_node(reader, SERD_CURIE, "", 0);
+ return *dest ? read_PrefixedName(reader, *dest, true, ate_dot)
+ : SERD_BAD_STACK;
}
}
@@ -918,11 +931,12 @@ read_literal(SerdReader* const reader,
SerdNode** const dest,
bool* const ate_dot)
{
- *dest = push_node(reader, SERD_LITERAL, "", 0);
+ if (!(*dest = push_node(reader, SERD_LITERAL, "", 0))) {
+ return SERD_BAD_STACK;
+ }
SerdStatus st = read_String(reader, *dest);
if (st) {
- *dest = NULL;
return st;
}
@@ -931,20 +945,13 @@ read_literal(SerdReader* const reader,
case '@':
skip_byte(reader, '@');
(*dest)->flags |= SERD_HAS_LANGUAGE;
- if ((st = read_LANGTAG(reader))) {
- return r_err(reader, st, "bad literal\n");
- }
+ TRY(st, read_LANGTAG(reader));
break;
case '^':
skip_byte(reader, '^');
- if (!eat_byte_check(reader, '^')) {
- return r_err(reader, SERD_BAD_SYNTAX, "expected '^'\n");
- }
-
+ TRY(st, eat_byte_check(reader, '^'));
(*dest)->flags |= SERD_HAS_DATATYPE;
- if ((st = read_iri(reader, &datatype, ate_dot))) {
- return r_err(reader, st, "bad datatype\n");
- }
+ TRY(st, read_iri(reader, &datatype, ate_dot));
break;
}
return SERD_SUCCESS;
@@ -965,22 +972,26 @@ read_verb(SerdReader* const reader, SerdNode** const dest)
return SERD_BAD_STACK;
}
- SerdStatus st = read_PN_PREFIX(reader, *dest);
- bool ate_dot = false;
- SerdNode* node = *dest;
- const int next = peek_byte(reader);
- if (!st && node->length == 1 && serd_node_string(node)[0] == 'a' &&
- next != ':' && !is_PN_CHARS_BASE((uint32_t)next)) {
+ SerdStatus st = read_PN_PREFIX(reader, *dest);
+ if (st > SERD_FAILURE) {
+ return st;
+ }
+
+ bool ate_dot = false;
+ SerdNode* node = *dest;
+ const int next = peek_byte(reader);
+ if (node->length == 1 && serd_node_string(node)[0] == 'a' && next != ':' &&
+ !is_PN_CHARS_BASE((uint32_t)next)) {
serd_stack_pop_to(&reader->stack, orig_stack_size);
- *dest = push_node(reader, SERD_URI, NS_RDF "type", 47);
- return SERD_SUCCESS;
+ return ((*dest = push_node(reader, SERD_URI, NS_RDF "type", 47))
+ ? SERD_SUCCESS
+ : SERD_BAD_STACK);
}
- if (st > SERD_FAILURE ||
- (st = read_PrefixedName(reader, *dest, false, &ate_dot)) || ate_dot) {
+ if ((st = read_PrefixedName(reader, *dest, false, &ate_dot)) || ate_dot) {
*dest = NULL;
- st = st > SERD_FAILURE ? st : SERD_BAD_SYNTAX;
- return r_err(reader, st, "bad verb\n");
+ return r_err(
+ reader, st > SERD_FAILURE ? st : SERD_BAD_SYNTAX, "expected verb\n");
}
return SERD_SUCCESS;
@@ -991,21 +1002,22 @@ read_BLANK_NODE_LABEL(SerdReader* const reader,
SerdNode** const dest,
bool* const ate_dot)
{
- skip_byte(reader, '_');
- if (!eat_byte_check(reader, ':')) {
- return SERD_BAD_SYNTAX;
- }
-
SerdStatus st = SERD_SUCCESS;
- SerdNode* n = *dest = push_node(reader,
- SERD_BLANK,
- reader->bprefix ? reader->bprefix : "",
- reader->bprefix_len);
+ skip_byte(reader, '_');
+ TRY(st, eat_byte_check(reader, ':'));
- int c = peek_byte(reader); // First: (PN_CHARS | '_' | [0-9])
+ if (!(*dest = push_node(reader,
+ SERD_BLANK,
+ reader->bprefix ? reader->bprefix : "",
+ reader->bprefix_len))) {
+ return SERD_BAD_STACK;
+ }
+
+ SerdNode* n = *dest;
+ int c = peek_byte(reader); // First: (PN_CHARS | '_' | [0-9])
if (is_digit(c) || c == '_') {
- push_byte(reader, n, eat_byte_safe(reader, c));
+ TRY(st, push_byte(reader, n, eat_byte_safe(reader, c)));
} else if ((st = read_PN_CHARS(reader, n))) {
st = st > SERD_FAILURE ? st : SERD_BAD_SYNTAX;
return r_err(reader, st, "invalid name start\n");
@@ -1065,7 +1077,9 @@ read_anon(SerdReader* const reader,
}
if (!*dest) {
- *dest = blank_id(reader);
+ if (!(*dest = blank_id(reader))) {
+ return SERD_BAD_STACK;
+ }
}
// Emit statement with this anonymous object first
@@ -1081,8 +1095,9 @@ read_anon(SerdReader* const reader,
if (!subject) {
*ctx.flags |= SERD_ANON_CONT;
}
+
bool ate_dot_in_list = false;
- TRY_FAILING(st, read_predicateObjectList(reader, ctx, &ate_dot_in_list));
+ TRY(st, read_predicateObjectList(reader, ctx, &ate_dot_in_list));
if (ate_dot_in_list) {
return r_err(reader, SERD_BAD_SYNTAX, "'.' inside blank\n");
@@ -1092,9 +1107,7 @@ read_anon(SerdReader* const reader,
*ctx.flags = old_flags;
}
- return st > SERD_FAILURE ? st
- : (eat_byte_check(reader, ']') == ']') ? SERD_SUCCESS
- : SERD_BAD_SYNTAX;
+ return st > SERD_FAILURE ? st : eat_byte_check(reader, ']');
}
/* If emit is true: recurses, calling statement_sink for every statement
@@ -1126,6 +1139,7 @@ read_object(SerdReader* const reader,
return r_err(reader, SERD_BAD_SYNTAX, "expected: ':', '<', or '_'\n");
}
}
+
switch (c) {
case EOF:
case ')':
@@ -1142,6 +1156,8 @@ read_object(SerdReader* const reader,
st = read_BLANK_NODE_LABEL(reader, &o, ate_dot);
break;
case '<':
+ st = read_IRIREF(reader, &o);
+ break;
case ':':
st = read_iri(reader, &o, ate_dot);
break;
@@ -1172,21 +1188,26 @@ read_object(SerdReader* const reader,
return SERD_BAD_STACK;
}
- while (!read_PN_CHARS_BASE(reader, o)) {
+ while (!(st = read_PN_CHARS_BASE(reader, o))) {
+ }
+
+ if (st > SERD_FAILURE) {
+ return st;
}
+
if ((o->length == 4 && !memcmp(serd_node_string(o), "true", 4)) ||
(o->length == 5 && !memcmp(serd_node_string(o), "false", 5))) {
o->flags |= SERD_HAS_DATATYPE;
o->type = SERD_LITERAL;
- push_node(reader, SERD_URI, XSD_BOOLEAN, XSD_BOOLEAN_LEN);
- st = SERD_SUCCESS;
- } else if (read_PN_PREFIX_tail(reader, o) > SERD_FAILURE) {
- st = SERD_BAD_SYNTAX;
- } else {
- if ((st = read_PrefixedName(reader, o, false, ate_dot))) {
- st = st > SERD_FAILURE ? st : SERD_BAD_SYNTAX;
- return r_err(reader, st, "expected prefixed name\n");
+ if (!(push_node(reader, SERD_URI, XSD_BOOLEAN, XSD_BOOLEAN_LEN))) {
+ st = SERD_BAD_STACK;
+ } else {
+ st = SERD_SUCCESS;
}
+ } else if ((st = read_PN_PREFIX_tail(reader, o)) > SERD_FAILURE ||
+ (st = read_PrefixedName(reader, o, false, ate_dot))) {
+ st = (st > SERD_FAILURE) ? st : SERD_BAD_SYNTAX;
+ return r_err(reader, st, "expected prefixed name\n");
}
}
@@ -1270,11 +1291,8 @@ static SerdStatus
end_collection(SerdReader* const reader, ReadContext ctx, const SerdStatus st)
{
*ctx.flags &= ~(unsigned)SERD_LIST_CONT;
- if (!st) {
- return (eat_byte_check(reader, ')') == ')') ? SERD_SUCCESS
- : SERD_BAD_SYNTAX;
- }
- return st;
+
+ return st ? st : eat_byte_check(reader, ')');
}
static SerdStatus
@@ -1287,8 +1305,10 @@ read_collection(SerdReader* const reader,
skip_byte(reader, '(');
bool end = peek_delim(reader, ')');
+ if (!(*dest = end ? reader->rdf_nil : blank_id(reader))) {
+ return SERD_BAD_STACK;
+ }
- *dest = end ? reader->rdf_nil : blank_id(reader);
if (ctx.subject) { // Reading a collection object
*ctx.flags |= (end ? 0 : SERD_LIST_O_BEGIN);
TRY(st, emit_statement(reader, ctx, *dest));
@@ -1327,6 +1347,7 @@ read_collection(SerdReader* const reader,
used and > IDs generated by read_object above. */
if (!rest) {
rest = blank_id(reader); // First pass, push
+ assert(rest); // Can't overflow since read_object() popped
} else {
set_blank_id(reader, rest, genid_size(reader));
}
@@ -1377,16 +1398,16 @@ read_subject(SerdReader* const reader,
static SerdStatus
read_labelOrSubject(SerdReader* const reader, SerdNode** const dest)
{
- bool ate_dot = false;
+ SerdStatus st = SERD_SUCCESS;
+ bool ate_dot = false;
+
switch (peek_byte(reader)) {
case '[':
skip_byte(reader, '[');
read_ws_star(reader);
- if (!eat_byte_check(reader, ']')) {
- return SERD_BAD_SYNTAX;
- }
+ TRY(st, eat_byte_check(reader, ']'));
*dest = blank_id(reader);
- return SERD_SUCCESS;
+ return *dest ? SERD_SUCCESS : SERD_BAD_STACK;
case '_':
return read_BLANK_NODE_LABEL(reader, dest, &ate_dot);
default:
@@ -1430,12 +1451,17 @@ read_base(SerdReader* const reader, const bool sparql, const bool token)
SerdNode* uri = NULL;
TRY(st, read_IRIREF(reader, &uri));
+
+ if (reader->stack.size + sizeof(SerdNode) > reader->stack.buf_size) {
+ return SERD_BAD_STACK;
+ }
+
serd_node_zero_pad(uri);
TRY(st, serd_sink_write_base(reader->sink, uri));
read_ws_star(reader);
if (!sparql) {
- return eat_byte_check(reader, '.') ? SERD_SUCCESS : SERD_BAD_SYNTAX;
+ return eat_byte_check(reader, '.');
}
if (peek_byte(reader) == '.') {
@@ -1461,21 +1487,23 @@ read_prefixID(SerdReader* const reader, const bool sparql, const bool token)
TRY_FAILING(st, read_PN_PREFIX(reader, name));
- if (eat_byte_check(reader, ':') != ':') {
- return SERD_BAD_SYNTAX;
- }
-
+ TRY(st, eat_byte_check(reader, ':'));
read_ws_star(reader);
+
SerdNode* uri = NULL;
TRY(st, read_IRIREF(reader, &uri));
+ if (reader->stack.size + sizeof(SerdNode) > reader->stack.buf_size) {
+ return SERD_BAD_STACK;
+ }
+
serd_node_zero_pad(name);
serd_node_zero_pad(uri);
st = serd_sink_write_prefix(reader->sink, name, uri);
if (!sparql) {
read_ws_star(reader);
- st = eat_byte_check(reader, '.') ? SERD_SUCCESS : SERD_BAD_SYNTAX;
+ st = eat_byte_check(reader, '.');
}
return st;
}
@@ -1510,20 +1538,18 @@ read_directive(SerdReader* const reader)
static SerdStatus
read_wrappedGraph(SerdReader* const reader, ReadContext* const ctx)
{
- if (!eat_byte_check(reader, '{')) {
- return SERD_BAD_SYNTAX;
- }
-
+ SerdStatus st = SERD_SUCCESS;
+ TRY(st, eat_byte_check(reader, '{'));
read_ws_star(reader);
+
while (peek_byte(reader) != '}') {
const size_t orig_stack_size = reader->stack.size;
bool ate_dot = false;
int s_type = 0;
- ctx->subject = 0;
- SerdStatus st = read_subject(reader, *ctx, &ctx->subject, &s_type);
- if (st) {
- return r_err(reader, SERD_BAD_SYNTAX, "bad subject\n");
+ ctx->subject = 0;
+ if ((st = read_subject(reader, *ctx, &ctx->subject, &s_type))) {
+ return r_err(reader, st, "expected subject\n");
}
if ((st = read_triples(reader, *ctx, &ate_dot)) && s_type != '[') {
@@ -1624,8 +1650,7 @@ read_n3_statement(SerdReader* const reader)
return st > SERD_FAILURE ? st : SERD_BAD_SYNTAX;
} else if (!ate_dot) {
read_ws_star(reader);
- st =
- (eat_byte_check(reader, '.') == '.') ? SERD_SUCCESS : SERD_BAD_SYNTAX;
+ st = eat_byte_check(reader, '.');
}
break;
}
@@ -1708,9 +1733,7 @@ read_nquads_statement(SerdReader* const reader)
// Terminating '.'
read_ws_star(reader);
- if (!eat_byte_check(reader, '.')) {
- return SERD_BAD_SYNTAX;
- }
+ TRY(st, eat_byte_check(reader, '.'));
}
TRY(st, emit_statement(reader, ctx, ctx.object));
diff --git a/src/reader.c b/src/reader.c
index fc39d7fa..25c4b3b1 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -114,6 +114,10 @@ emit_statement(SerdReader* const reader,
graph = reader->default_graph;
}
+ if (reader->stack.size + (2 * sizeof(SerdNode)) > reader->stack.buf_size) {
+ return SERD_BAD_STACK;
+ }
+
/* Zero the pad of the object node on the top of the stack. Lower nodes
(subject and predicate) were already zeroed by subsequent pushes. */
serd_node_zero_pad(o);
diff --git a/src/reader.h b/src/reader.h
index 00a41794..c562ac6c 100644
--- a/src/reader.h
+++ b/src/reader.h
@@ -7,6 +7,7 @@
#include "byte_source.h"
#include "node.h"
#include "stack.h"
+#include "try.h"
#include "serd/attributes.h"
#include "serd/error.h"
@@ -119,26 +120,28 @@ eat_byte_safe(SerdReader* reader, const int byte)
return byte;
}
-static inline int SERD_NODISCARD
+static inline SerdStatus SERD_NODISCARD
eat_byte_check(SerdReader* reader, const int byte)
{
const int c = peek_byte(reader);
if (c != byte) {
- r_err(reader, SERD_BAD_SYNTAX, "expected '%c', not '%c'\n", byte, c);
- return 0;
+ return r_err(reader, SERD_BAD_SYNTAX, "expected '%c', not '%c'\n", byte, c);
}
- return eat_byte_safe(reader, byte);
+
+ skip_byte(reader, c);
+ return SERD_SUCCESS;
}
static inline SerdStatus
eat_string(SerdReader* reader, const char* str, unsigned n)
{
+ SerdStatus st = SERD_SUCCESS;
+
for (unsigned i = 0; i < n; ++i) {
- if (!eat_byte_check(reader, str[i])) {
- return SERD_BAD_SYNTAX;
- }
+ TRY(st, eat_byte_check(reader, str[i]));
}
- return SERD_SUCCESS;
+
+ return st;
}
static inline SerdStatus
diff --git a/test/meson.build b/test/meson.build
index d9a386c3..ffd65a17 100644
--- a/test/meson.build
+++ b/test/meson.build
@@ -122,6 +122,7 @@ unit_tests = [
'env',
'free_null',
'node',
+ 'overflow',
'reader_writer',
'sink',
'string',
diff --git a/test/test_overflow.c b/test/test_overflow.c
new file mode 100644
index 00000000..ac4a490b
--- /dev/null
+++ b/test/test_overflow.c
@@ -0,0 +1,152 @@
+// Copyright 2018 David Robillard <d@drobilla.net>
+// SPDX-License-Identifier: ISC
+
+#undef NDEBUG
+
+#include "serd/serd.h"
+
+#include <assert.h>
+#include <stdio.h>
+
+static const size_t min_stack_size = 4U * sizeof(size_t) + 240U;
+static const size_t max_stack_size = 1024U;
+
+static SerdStatus
+test_size(const char* const str,
+ const SerdSyntax syntax,
+ const size_t stack_size)
+{
+ SerdSink* sink = serd_sink_new(NULL, NULL);
+ SerdReader* const reader = serd_reader_new(syntax, sink, stack_size);
+ if (!reader) {
+ return SERD_BAD_STACK;
+ }
+
+ serd_reader_start_string(reader, str);
+ const SerdStatus st = serd_reader_read_document(reader);
+ serd_reader_free(reader);
+ serd_sink_free(sink);
+
+ return st;
+}
+
+static void
+test_all_sizes(const char* const str, const SerdSyntax syntax)
+{
+ // Ensure reading with the maximum stack size succeeds
+ SerdStatus st = test_size(str, syntax, max_stack_size);
+ assert(!st);
+
+ // Test with an increasingly smaller stack
+ for (size_t size = max_stack_size; size > min_stack_size; --size) {
+ if ((st = test_size(str, syntax, size))) {
+ assert(st == SERD_BAD_STACK);
+ }
+ }
+
+ assert(st == SERD_BAD_STACK);
+}
+
+static void
+test_ntriples_overflow(void)
+{
+ static const char* const test_strings[] = {
+ "<http://example.org/s> <http://example.org/p> <http://example.org/o> .",
+ NULL,
+ };
+
+ for (const char* const* t = test_strings; *t; ++t) {
+ test_all_sizes(*t, SERD_NTRIPLES);
+ }
+}
+
+static void
+test_turtle_overflow(void)
+{
+ static const char* const test_strings[] = {
+ "<http://example.org/s> <http://example.org/p> <http://example.org/> .",
+ "<http://example.org/s> <http://example.org/p> "
+ "<thisisanabsurdlylongurischeme://because/testing/> .",
+ "<http://example.org/s> <http://example.org/p> 1234 .",
+ "<http://example.org/s> <http://example.org/p> (1 2 3 4) .",
+ "<http://example.org/s> <http://example.org/p> ((((((((42)))))))) .",
+ "<http://example.org/s> <http://example.org/p> \"literal\" .",
+ "<http://example.org/s> <http://example.org/p> _:blank .",
+ "<http://example.org/s> <http://example.org/p> true .",
+ "<http://example.org/s> <http://example.org/p> \"\"@en .",
+ "(((((((((42))))))))) <http://example.org/p> <http://example.org/o> .",
+ "@prefix eg: <http://example.org/ns/test> .",
+ "@base <http://example.org/base> .",
+
+ "@prefix eg: <http://example.org/> . \neg:s eg:p eg:o .\n",
+
+ "@prefix ug.dot: <http://example.org/> . \nug.dot:s ug.dot:p ug.dot:o .\n",
+
+ // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
+ "@prefix øøøøøøøøø: <http://example.org/long> . \n"
+ "<http://example.org/somewhatlongsubjecttooffsetthepredicate> øøøøøøøøø:p "
+ "øøøøøøøøø:o .\n",
+
+ // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
+ "<http://example.org/subject/with/a/long/path> "
+ "<http://example.org/predicate/with/a/long/path> "
+ "<http://example.org/object/with/a/long/path> .",
+
+ // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
+ "<http://example.org/s> <http://example.org/p> "
+ "\"typed\"^^<http://example.org/Datatype> .",
+
+ // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
+ "@prefix eg: <http://example.org/ns/test> .\n"
+ "<http://example.org/s> <http://example.org/p> "
+ "\"typed\"^^eg:Datatype .",
+
+ // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
+ "@prefix eg: <http://example.org/ns/test> .\n"
+ "<http://example.org/s> <http://example.org/p> eg:foo .",
+
+ // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
+ "@prefix prefix: <http://example.org/testing/curies> .\n"
+ "prefix:subject prefix:predicate prefix:object .\n",
+
+ // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
+ "@prefix prefix: <http://example.org/testing/curies> .\n"
+ "prefix:subjectthatwillcomearoundtobeingfinishedanycharacternow "
+ "prefix:predicate prefix:object .\n",
+
+ // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
+ "@prefix eg: <http://example.org/> .\n"
+ "eg:s eg:p [ eg:p [ eg:p [ eg:p [ eg:p eg:o ] ] ] ] .\n",
+
+ // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
+ "@prefix eg: <http://example.org/> .\n"
+ "eg:s eg:p ( 1 2 3 ( 4 5 6 ( 7 8 9 ) ) ) .\n",
+
+ // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
+ "@prefix eg: <http://example.org/ns/test> .\n"
+ "<http://example.org/s> <http://example.org/p> eg:%99 .",
+
+ // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
+ "@prefix øøøøøøøøø: <http://example.org/long> .\n"
+ "<http://example.org/somewhatlongsubjecttooffsetthepredicate> øøøøøøøøø:p "
+ "øøøøøøøøø:o .\n",
+
+ // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
+ "@base <http://example.org/ns/test> .\n"
+ "<http://example.org/s> <http://example.org/p> <rel> .",
+
+ NULL,
+ };
+
+ for (const char* const* t = test_strings; *t; ++t) {
+ test_all_sizes(*t, SERD_TURTLE);
+ }
+}
+
+int
+main(void)
+{
+ test_ntriples_overflow();
+ test_turtle_overflow();
+ return 0;
+}