diff options
author | David Robillard <d@drobilla.net> | 2020-08-15 20:11:19 +0200 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2022-01-13 23:03:37 -0500 |
commit | a10fddf0f697e78325ddcfbc71af8f154ffd2a82 (patch) | |
tree | 75c2dd20aef99bff82dc81c93ab29b81313a9c8e | |
parent | 0839a0fda214250c41c07c799c700f4432e54963 (diff) | |
download | serd-a10fddf0f697e78325ddcfbc71af8f154ffd2a82.tar.gz serd-a10fddf0f697e78325ddcfbc71af8f154ffd2a82.tar.bz2 serd-a10fddf0f697e78325ddcfbc71af8f154ffd2a82.zip |
Improve reader error handling
-rw-r--r-- | src/n3.c | 294 | ||||
-rw-r--r-- | src/reader.c | 4 | ||||
-rw-r--r-- | src/reader.h | 19 | ||||
-rw-r--r-- | test/meson.build | 1 | ||||
-rw-r--r-- | test/test_overflow.c | 163 |
5 files changed, 350 insertions, 131 deletions
@@ -231,8 +231,8 @@ read_utf8_character(SerdReader* const reader, uint8_t bytes[4] = {0, 0, 0, 0}; SerdStatus st = read_utf8_bytes(reader, bytes, &size, c); if (st) { - push_bytes(reader, dest, replacement_char, 3); - return st; + const SerdStatus rst = push_bytes(reader, dest, replacement_char, 3); + return rst ? rst : st; } return push_bytes(reader, dest, bytes, size); @@ -248,8 +248,8 @@ read_utf8_code(SerdReader* const reader, uint8_t bytes[4] = {0, 0, 0, 0}; SerdStatus st = read_utf8_bytes(reader, bytes, &size, c); if (st) { - push_bytes(reader, dest, replacement_char, 3); - return st; + const SerdStatus rst = push_bytes(reader, dest, replacement_char, 3); + return rst ? rst : st; } if (!(st = push_bytes(reader, dest, bytes, size))) { @@ -368,8 +368,9 @@ read_STRING_LITERAL_LONG(SerdReader* const reader, break; } ref->flags |= SERD_HAS_QUOTE; - push_byte(reader, ref, c); - st = read_character(reader, ref, (uint8_t)q2); + if (!(st = push_byte(reader, ref, c))) { + st = read_character(reader, ref, (uint8_t)q2); + } } else if (c == EOF) { return r_err(reader, SERD_ERR_BAD_SYNTAX, "end of file in long string\n"); } else { @@ -408,17 +409,14 @@ read_STRING_LITERAL(SerdReader* const reader, break; default: if (c == q) { - eat_byte_check(reader, q); - return SERD_SUCCESS; + return eat_byte_check(reader, q); } else { st = read_character(reader, ref, (uint8_t)eat_byte_safe(reader, c)); } } } - return st ? st - : eat_byte_check(reader, q) ? SERD_SUCCESS - : SERD_ERR_BAD_SYNTAX; + return st ? st : eat_byte_check(reader, q); } static SerdStatus @@ -473,7 +471,7 @@ read_PN_CHARS_BASE(SerdReader* const reader, SerdNode* const dest) const int c = peek_byte(reader); SerdStatus st = SERD_SUCCESS; if (is_alpha(c)) { - push_byte(reader, dest, eat_byte_safe(reader, c)); + st = push_byte(reader, dest, eat_byte_safe(reader, c)); } else if (c == EOF || !(c & 0x80)) { return SERD_FAILURE; } else if ((st = read_utf8_code( @@ -503,7 +501,7 @@ read_PN_CHARS(SerdReader* const reader, SerdNode* const dest) const int c = peek_byte(reader); SerdStatus st = SERD_SUCCESS; if (is_alpha(c) || is_digit(c) || c == '_' || c == '-') { - push_byte(reader, dest, eat_byte_safe(reader, c)); + st = push_byte(reader, dest, eat_byte_safe(reader, c)); } else if (c == EOF || !(c & 0x80)) { return SERD_FAILURE; } else if ((st = read_utf8_code( @@ -519,15 +517,22 @@ read_PN_CHARS(SerdReader* const reader, SerdNode* const dest) static SerdStatus read_PERCENT(SerdReader* const reader, SerdNode* const dest) { - push_byte(reader, dest, eat_byte_safe(reader, '%')); + SerdStatus st = push_byte(reader, dest, eat_byte_safe(reader, '%')); + if (st) { + return st; + } + const uint8_t h1 = read_HEX(reader); const uint8_t h2 = read_HEX(reader); - if (h1 && h2) { - push_byte(reader, dest, h1); - return push_byte(reader, dest, h2); + if (!h1 || !h2) { + return SERD_ERR_BAD_SYNTAX; + } + + if (!(st = push_byte(reader, dest, h1))) { + st = push_byte(reader, dest, h2); } - return SERD_ERR_BAD_SYNTAX; + return st; } static SerdStatus @@ -557,13 +562,12 @@ read_PN_LOCAL_ESC(SerdReader* const reader, SerdNode* const dest) case '@': case '_': case '~': - push_byte(reader, dest, eat_byte_safe(reader, c)); - break; + return push_byte(reader, dest, eat_byte_safe(reader, c)); default: - return r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid escape\n"); + break; } - return SERD_SUCCESS; + return r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid escape\n"); } static SerdStatus @@ -601,7 +605,7 @@ read_PN_LOCAL(SerdReader* const reader, case '9': case ':': case '_': - push_byte(reader, dest, eat_byte_safe(reader, c)); + st = push_byte(reader, dest, eat_byte_safe(reader, c)); break; default: if ((st = read_PLX(reader, dest)) > SERD_FAILURE) { @@ -613,9 +617,9 @@ read_PN_LOCAL(SerdReader* const reader, while ((c = peek_byte(reader))) { // Middle: (PN_CHARS | '.' | ':')* if (c == '.' || c == ':') { - push_byte(reader, dest, eat_byte_safe(reader, c)); + st = push_byte(reader, dest, eat_byte_safe(reader, c)); } else if ((st = read_PLX(reader, dest)) > SERD_FAILURE) { - return r_err(reader, SERD_ERR_BAD_SYNTAX, "bad escape\n"); + return r_err(reader, st, "bad escape\n"); } else if (st != SERD_SUCCESS && (st = read_PN_CHARS(reader, dest))) { break; } @@ -647,22 +651,27 @@ read_PN_PREFIX_tail(SerdReader* const reader, SerdNode* const dest) } if (st <= SERD_FAILURE && - serd_node_string(dest)[serd_node_length(dest) - 1] == '.' && - read_PN_CHARS(reader, dest)) { - return r_err(reader, SERD_ERR_BAD_SYNTAX, "prefix ends with `.'\n"); + serd_node_string(dest)[serd_node_length(dest) - 1] == '.') { + if ((st = read_PN_CHARS(reader, dest))) { + return r_err(reader, + st > SERD_FAILURE ? st : SERD_ERR_BAD_SYNTAX, + "prefix ends with `.'\n"); + } } - return st > SERD_FAILURE ? st : SERD_SUCCESS; + return st; } static SerdStatus read_PN_PREFIX(SerdReader* const reader, SerdNode* const dest) { - if (!read_PN_CHARS_BASE(reader, dest)) { + SerdStatus st = SERD_SUCCESS; + + if (!(st = read_PN_CHARS_BASE(reader, dest))) { return read_PN_PREFIX_tail(reader, dest); } - return SERD_FAILURE; + return st; } static SerdStatus @@ -700,6 +709,7 @@ read_IRIREF_scheme(SerdReader* const reader, SerdNode* const dest) return r_err(reader, SERD_ERR_BAD_SYNTAX, "bad IRI scheme start `%c'\n", c); } + SerdStatus st = SERD_SUCCESS; while ((c = peek_byte(reader)) != EOF) { if (c == '>') { return r_err(reader, SERD_ERR_BAD_SYNTAX, "missing IRI scheme\n"); @@ -713,30 +723,35 @@ read_IRIREF_scheme(SerdReader* const reader, SerdNode* const dest) (char)c); } - push_byte(reader, dest, eat_byte_safe(reader, c)); + if ((st = push_byte(reader, dest, eat_byte_safe(reader, c)))) { + return st; + } + if (c == ':') { return SERD_SUCCESS; // End of scheme } } - return r_err(reader, SERD_ERR_BAD_SYNTAX, "unexpected end of file\n"); + return SERD_FAILURE; } static SerdStatus read_IRIREF(SerdReader* const reader, SerdNode** const dest) { - if (!eat_byte_check(reader, '<')) { - return SERD_ERR_BAD_SYNTAX; + SerdStatus st = SERD_SUCCESS; + if ((st = eat_byte_check(reader, '<'))) { + return st; } - *dest = push_node(reader, SERD_URI, "", 0); + if (!(*dest = push_node(reader, SERD_URI, "", 0))) { + return SERD_ERR_OVERFLOW; + } - if (!fancy_syntax(reader) && read_IRIREF_scheme(reader, *dest)) { - return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected IRI scheme\n"); + if (!fancy_syntax(reader) && (st = read_IRIREF_scheme(reader, *dest))) { + return r_err(reader, st, "expected IRI scheme\n"); } - SerdStatus st = SERD_SUCCESS; - uint32_t code = 0; + uint32_t code = 0; while (st <= SERD_FAILURE) { const int c = eat_byte_safe(reader, peek_byte(reader)); switch (c) { @@ -780,10 +795,11 @@ read_IRIREF(SerdReader* const reader, SerdNode** const dest) break; } - st = SERD_FAILURE; - push_byte(reader, *dest, c); + if (!(st = push_byte(reader, *dest, c))) { + st = SERD_FAILURE; + } } else if (!(c & 0x80)) { - push_byte(reader, *dest, c); + st = push_byte(reader, *dest, c); } else if (read_utf8_character(reader, *dest, (uint8_t)c)) { if (reader->strict) { return SERD_ERR_BAD_SYNTAX; @@ -810,11 +826,12 @@ read_PrefixedName(SerdReader* const reader, return SERD_FAILURE; } - push_byte(reader, dest, eat_byte_safe(reader, ':')); - - st = read_PN_LOCAL(reader, dest, ate_dot); + if ((st = push_byte(reader, dest, eat_byte_safe(reader, ':'))) || + (st = read_PN_LOCAL(reader, dest, ate_dot)) > SERD_FAILURE) { + return st; + } - return (st > SERD_FAILURE) ? st : SERD_SUCCESS; + return SERD_SUCCESS; } static SerdStatus @@ -830,7 +847,7 @@ read_0_9(SerdReader* const reader, SerdNode* const str, const bool at_least_one) return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected digit\n"); } - return SERD_SUCCESS; + return st; } static SerdStatus @@ -852,13 +869,13 @@ read_number(SerdReader* const reader, } if (c == '-' || c == '+') { - push_byte(reader, *dest, eat_byte_safe(reader, c)); + TRY(st, push_byte(reader, *dest, eat_byte_safe(reader, c))); } if ((c = peek_byte(reader)) == '.') { has_decimal = true; // decimal case 2 (e.g. '.0' or `-.0' or `+.0') - push_byte(reader, *dest, eat_byte_safe(reader, c)); + TRY(st, push_byte(reader, *dest, eat_byte_safe(reader, c))); TRY(st, read_0_9(reader, *dest, true)); } else { // all other cases ::= ( '-' | '+' ) [0-9]+ ( . )? ( [0-9]+ )? ... @@ -874,18 +891,18 @@ read_number(SerdReader* const reader, return SERD_SUCCESS; // Next byte is not a number character } - push_byte(reader, *dest, '.'); + TRY(st, push_byte(reader, *dest, '.')); read_0_9(reader, *dest, false); } } c = peek_byte(reader); if (c == 'e' || c == 'E') { // double - push_byte(reader, *dest, eat_byte_safe(reader, c)); + TRY(st, push_byte(reader, *dest, eat_byte_safe(reader, c))); switch ((c = peek_byte(reader))) { case '+': case '-': - push_byte(reader, *dest, eat_byte_safe(reader, c)); + TRY(st, push_byte(reader, *dest, eat_byte_safe(reader, c))); break; default: break; @@ -911,10 +928,9 @@ read_iri(SerdReader* const reader, SerdNode** const dest, bool* const ate_dot) case '<': return read_IRIREF(reader, dest); default: - if (!(*dest = push_node(reader, SERD_CURIE, "", 0))) { - return SERD_ERR_OVERFLOW; - } - return read_PrefixedName(reader, *dest, true, ate_dot); + *dest = push_node(reader, SERD_CURIE, "", 0); + return *dest ? read_PrefixedName(reader, *dest, true, ate_dot) + : SERD_ERR_OVERFLOW; } } @@ -923,11 +939,12 @@ read_literal(SerdReader* const reader, SerdNode** const dest, bool* const ate_dot) { - *dest = push_node(reader, SERD_LITERAL, "", 0); + if (!(*dest = push_node(reader, SERD_LITERAL, "", 0))) { + return SERD_ERR_OVERFLOW; + } SerdStatus st = read_String(reader, *dest); if (st) { - *dest = NULL; return st; } @@ -936,17 +953,13 @@ read_literal(SerdReader* const reader, case '@': eat_byte_safe(reader, '@'); (*dest)->flags |= SERD_HAS_LANGUAGE; - if ((st = read_LANGTAG(reader))) { - return r_err(reader, st, "bad literal\n"); - } + TRY(st, read_LANGTAG(reader)); break; case '^': eat_byte_safe(reader, '^'); - eat_byte_check(reader, '^'); + TRY(st, eat_byte_check(reader, '^')); (*dest)->flags |= SERD_HAS_DATATYPE; - if ((st = read_iri(reader, &datatype, ate_dot))) { - return r_err(reader, st, "bad literal\n"); - } + TRY(st, read_iri(reader, &datatype, ate_dot)); break; } return SERD_SUCCESS; @@ -967,21 +980,26 @@ read_verb(SerdReader* const reader, SerdNode** const dest) return SERD_ERR_OVERFLOW; } - SerdStatus st = read_PN_PREFIX(reader, *dest); - bool ate_dot = false; - SerdNode* node = *dest; - const int next = peek_byte(reader); - if (!st && node->length == 1 && serd_node_string(node)[0] == 'a' && - next != ':' && !is_PN_CHARS_BASE((uint32_t)next)) { + SerdStatus st = read_PN_PREFIX(reader, *dest); + if (st > SERD_FAILURE) { + return st; + } + + bool ate_dot = false; + SerdNode* node = *dest; + const int next = peek_byte(reader); + if (node->length == 1 && serd_node_string(node)[0] == 'a' && next != ':' && + !is_PN_CHARS_BASE((uint32_t)next)) { serd_stack_pop_to(&reader->stack, orig_stack_size); - *dest = push_node(reader, SERD_URI, NS_RDF "type", 47); - return SERD_SUCCESS; + return ((*dest = push_node(reader, SERD_URI, NS_RDF "type", 47)) + ? SERD_SUCCESS + : SERD_ERR_OVERFLOW); } - if (st > SERD_FAILURE || read_PrefixedName(reader, *dest, false, &ate_dot) || - ate_dot) { + if ((st = read_PrefixedName(reader, *dest, false, &ate_dot)) || ate_dot) { *dest = NULL; - return r_err(reader, SERD_ERR_BAD_SYNTAX, "bad verb\n"); + return r_err( + reader, st > SERD_FAILURE ? st : SERD_ERR_BAD_SYNTAX, "expected verb\n"); } return SERD_SUCCESS; @@ -992,19 +1010,22 @@ read_BLANK_NODE_LABEL(SerdReader* const reader, SerdNode** const dest, bool* const ate_dot) { - eat_byte_safe(reader, '_'); - eat_byte_check(reader, ':'); - SerdStatus st = SERD_SUCCESS; - SerdNode* n = *dest = push_node(reader, - SERD_BLANK, - reader->bprefix ? reader->bprefix : "", - reader->bprefix_len); + eat_byte_safe(reader, '_'); + TRY(st, eat_byte_check(reader, ':')); - int c = peek_byte(reader); // First: (PN_CHARS | '_' | [0-9]) + if (!(*dest = push_node(reader, + SERD_BLANK, + reader->bprefix ? reader->bprefix : "", + reader->bprefix_len))) { + return SERD_ERR_OVERFLOW; + } + + SerdNode* n = *dest; + int c = peek_byte(reader); // First: (PN_CHARS | '_' | [0-9]) if (is_digit(c) || c == '_') { - push_byte(reader, n, eat_byte_safe(reader, c)); + TRY(st, push_byte(reader, n, eat_byte_safe(reader, c))); } else if ((st = read_PN_CHARS(reader, n))) { return r_err(reader, st, "invalid name start\n"); } @@ -1061,7 +1082,9 @@ read_anon(SerdReader* const reader, } if (!*dest) { - *dest = blank_id(reader); + if (!(*dest = blank_id(reader))) { + return SERD_ERR_OVERFLOW; + } } SerdStatus st = SERD_SUCCESS; @@ -1075,8 +1098,12 @@ read_anon(SerdReader* const reader, if (!subject) { *ctx.flags |= SERD_ANON_CONT; } + bool ate_dot_in_list = false; - read_predicateObjectList(reader, ctx, &ate_dot_in_list); + if ((st = read_predicateObjectList(reader, ctx, &ate_dot_in_list))) { + return st; + } + if (ate_dot_in_list) { return r_err(reader, SERD_ERR_BAD_SYNTAX, "`.' inside blank\n"); } @@ -1084,8 +1111,8 @@ read_anon(SerdReader* const reader, serd_sink_write_end(reader->sink, *dest); *ctx.flags = old_flags; } - return (eat_byte_check(reader, ']') == ']') ? SERD_SUCCESS - : SERD_ERR_BAD_SYNTAX; + + return eat_byte_check(reader, ']'); } /* If emit is true: recurses, calling statement_sink for every statement @@ -1117,6 +1144,7 @@ read_object(SerdReader* const reader, return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected: ':', '<', or '_'\n"); } } + switch (c) { case EOF: case ')': @@ -1133,6 +1161,8 @@ read_object(SerdReader* const reader, ret = read_BLANK_NODE_LABEL(reader, &o, ate_dot); break; case '<': + ret = read_IRIREF(reader, &o); + break; case ':': ret = read_iri(reader, &o, ate_dot); break; @@ -1163,21 +1193,26 @@ read_object(SerdReader* const reader, return SERD_ERR_OVERFLOW; } - while (!read_PN_CHARS_BASE(reader, o)) { + while (!(ret = read_PN_CHARS_BASE(reader, o))) { + } + + if (ret > SERD_FAILURE) { + return ret; } + if ((o->length == 4 && !memcmp(serd_node_string(o), "true", 4)) || (o->length == 5 && !memcmp(serd_node_string(o), "false", 5))) { o->flags |= SERD_HAS_DATATYPE; o->type = SERD_LITERAL; - push_node(reader, SERD_URI, XSD_BOOLEAN, XSD_BOOLEAN_LEN); - ret = SERD_SUCCESS; - } else if (read_PN_PREFIX_tail(reader, o) > SERD_FAILURE) { - ret = SERD_ERR_BAD_SYNTAX; - } else { - if ((ret = read_PrefixedName(reader, o, false, ate_dot))) { - ret = ret > SERD_FAILURE ? ret : SERD_ERR_BAD_SYNTAX; - return r_err(reader, ret, "expected prefixed name\n"); + if (!(push_node(reader, SERD_URI, XSD_BOOLEAN, XSD_BOOLEAN_LEN))) { + ret = SERD_ERR_OVERFLOW; + } else { + ret = SERD_SUCCESS; } + } else if ((ret = read_PN_PREFIX_tail(reader, o)) > SERD_FAILURE || + (ret = read_PrefixedName(reader, o, false, ate_dot))) { + ret = (ret > SERD_FAILURE) ? ret : SERD_ERR_BAD_SYNTAX; + return r_err(reader, ret, "expected prefixed name\n"); } } @@ -1261,11 +1296,8 @@ static SerdStatus end_collection(SerdReader* const reader, ReadContext ctx, const SerdStatus st) { *ctx.flags &= ~(unsigned)SERD_LIST_CONT; - if (!st) { - return (eat_byte_check(reader, ')') == ')') ? SERD_SUCCESS - : SERD_ERR_BAD_SYNTAX; - } - return st; + + return st ? st : eat_byte_check(reader, ')'); } static SerdStatus @@ -1277,6 +1309,11 @@ read_collection(SerdReader* const reader, eat_byte_safe(reader, '('); bool end = peek_delim(reader, ')'); *dest = end ? reader->rdf_nil : blank_id(reader); + + if (!*dest) { + return SERD_ERR_OVERFLOW; + } + if (ctx.subject) { // subject predicate _:head *ctx.flags |= (end ? 0 : SERD_LIST_O_BEGIN); @@ -1315,6 +1352,7 @@ read_collection(SerdReader* const reader, used and > IDs generated by read_object above. */ if (!rest) { rest = blank_id(reader); // First pass, push + assert(rest); // Can't overflow since read_object() popped } else { set_blank_id(reader, rest, genid_size(reader)); } @@ -1343,7 +1381,7 @@ read_subject(SerdReader* const reader, bool ate_dot = false; switch ((*s_type = peek_byte(reader))) { case '[': - read_anon(reader, ctx, true, dest); + st = read_anon(reader, ctx, true, dest); break; case '(': st = read_collection(reader, ctx, dest); @@ -1365,16 +1403,18 @@ read_subject(SerdReader* const reader, static SerdStatus read_labelOrSubject(SerdReader* const reader, SerdNode** const dest) { - bool ate_dot = false; + SerdStatus st = SERD_SUCCESS; + bool ate_dot = false; + switch (peek_byte(reader)) { case '[': eat_byte_safe(reader, '['); read_ws_star(reader); - if (!eat_byte_check(reader, ']')) { - return SERD_ERR_BAD_SYNTAX; + if ((st = eat_byte_check(reader, ']'))) { + return st; } *dest = blank_id(reader); - return SERD_SUCCESS; + return *dest ? SERD_SUCCESS : SERD_ERR_OVERFLOW; case '_': return read_BLANK_NODE_LABEL(reader, dest, &ate_dot); default: @@ -1418,12 +1458,17 @@ read_base(SerdReader* const reader, const bool sparql, const bool token) SerdNode* uri = NULL; TRY(st, read_IRIREF(reader, &uri)); + + if (reader->stack.size + sizeof(SerdNode) > reader->stack.buf_size) { + return SERD_ERR_OVERFLOW; + } + serd_node_zero_pad(uri); TRY(st, serd_sink_write_base(reader->sink, uri)); read_ws_star(reader); if (!sparql) { - return eat_byte_check(reader, '.') ? SERD_SUCCESS : SERD_ERR_BAD_SYNTAX; + return eat_byte_check(reader, '.'); } if (peek_byte(reader) == '.') { @@ -1451,21 +1496,25 @@ read_prefixID(SerdReader* const reader, const bool sparql, const bool token) return st; } - if (eat_byte_check(reader, ':') != ':') { - return SERD_ERR_BAD_SYNTAX; + if ((st = eat_byte_check(reader, ':'))) { + return st; } read_ws_star(reader); SerdNode* uri = NULL; TRY(st, read_IRIREF(reader, &uri)); + if (reader->stack.size + sizeof(SerdNode) > reader->stack.buf_size) { + return SERD_ERR_OVERFLOW; + } + serd_node_zero_pad(name); serd_node_zero_pad(uri); st = serd_sink_write_prefix(reader->sink, name, uri); if (!sparql) { read_ws_star(reader); - st = eat_byte_check(reader, '.') ? SERD_SUCCESS : SERD_ERR_BAD_SYNTAX; + st = eat_byte_check(reader, '.'); } return st; } @@ -1500,8 +1549,9 @@ read_directive(SerdReader* const reader) static SerdStatus read_wrappedGraph(SerdReader* const reader, ReadContext* const ctx) { - if (!eat_byte_check(reader, '{')) { - return SERD_ERR_BAD_SYNTAX; + SerdStatus st = SERD_SUCCESS; + if ((st = eat_byte_check(reader, '{'))) { + return st; } read_ws_star(reader); @@ -1510,10 +1560,9 @@ read_wrappedGraph(SerdReader* const reader, ReadContext* const ctx) bool ate_dot = false; int s_type = 0; - ctx->subject = 0; - SerdStatus st = read_subject(reader, *ctx, &ctx->subject, &s_type); - if (st) { - return r_err(reader, SERD_ERR_BAD_SYNTAX, "bad subject\n"); + ctx->subject = 0; + if ((st = read_subject(reader, *ctx, &ctx->subject, &s_type))) { + return r_err(reader, st, "expected subject\n"); } if (read_triples(reader, *ctx, &ate_dot) && s_type != '[') { @@ -1616,8 +1665,7 @@ read_n3_statement(SerdReader* const reader) return st > SERD_FAILURE ? st : SERD_ERR_BAD_SYNTAX; } else if (!ate_dot) { read_ws_star(reader); - st = (eat_byte_check(reader, '.') == '.') ? SERD_SUCCESS - : SERD_ERR_BAD_SYNTAX; + st = eat_byte_check(reader, '.'); } break; } @@ -1696,9 +1744,7 @@ read_nquadsDoc(SerdReader* const reader) // Terminating '.' read_ws_star(reader); - if (!eat_byte_check(reader, '.')) { - return SERD_ERR_BAD_SYNTAX; - } + TRY(st, eat_byte_check(reader, '.')); } TRY(st, emit_statement(reader, ctx, ctx.object)); diff --git a/src/reader.c b/src/reader.c index 1677ed92..354ff478 100644 --- a/src/reader.c +++ b/src/reader.c @@ -124,6 +124,10 @@ emit_statement(SerdReader* const reader, graph = reader->default_graph; } + if (reader->stack.size + (2 * sizeof(SerdNode)) > reader->stack.buf_size) { + return SERD_ERR_OVERFLOW; + } + /* Zero the pad of the object node on the top of the stack. Lower nodes (subject and predicate) were already zeroed by subsequent pushes. */ serd_node_zero_pad(o); diff --git a/src/reader.h b/src/reader.h index 9f48601d..632d5257 100644 --- a/src/reader.h +++ b/src/reader.h @@ -120,26 +120,31 @@ eat_byte_safe(SerdReader* reader, const int byte) return c; } -static inline int +static inline SerdStatus eat_byte_check(SerdReader* reader, const int byte) { const int c = peek_byte(reader); if (c != byte) { - r_err(reader, SERD_ERR_BAD_SYNTAX, "expected `%c', not `%c'\n", byte, c); - return 0; + return r_err( + reader, SERD_ERR_BAD_SYNTAX, "expected `%c', not `%c'\n", byte, c); } - return eat_byte_safe(reader, byte); + + eat_byte_safe(reader, byte); + return SERD_SUCCESS; } static inline SerdStatus eat_string(SerdReader* reader, const char* str, unsigned n) { + SerdStatus st = SERD_SUCCESS; + for (unsigned i = 0; i < n; ++i) { - if (!eat_byte_check(reader, ((const uint8_t*)str)[i])) { - return SERD_ERR_BAD_SYNTAX; + if ((st = eat_byte_check(reader, ((const uint8_t*)str)[i]))) { + return st; } } - return SERD_SUCCESS; + + return st; } static inline SerdStatus diff --git a/test/meson.build b/test/meson.build index 3ec9d38b..a57e2cf2 100644 --- a/test/meson.build +++ b/test/meson.build @@ -8,6 +8,7 @@ unit_tests = [ 'env', 'free_null', 'node', + 'overflow', 'read_chunk', 'reader_writer', 'sink', diff --git a/test/test_overflow.c b/test/test_overflow.c new file mode 100644 index 00000000..13516388 --- /dev/null +++ b/test/test_overflow.c @@ -0,0 +1,163 @@ +/* + Copyright 2018 David Robillard <d@drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#undef NDEBUG + +#include "serd/serd.h" + +#include <assert.h> +#include <stdio.h> + +static const size_t min_stack_size = 4 * sizeof(size_t) + 256u; +static const size_t max_stack_size = 1024u; + +static SerdStatus +test_size(const char* const str, + const SerdSyntax syntax, + const size_t stack_size) +{ + SerdSink* sink = serd_sink_new(NULL, NULL); + SerdReader* const reader = serd_reader_new(syntax, sink, stack_size); + assert(reader); + + serd_reader_start_string(reader, str); + const SerdStatus st = serd_reader_read_document(reader); + serd_reader_free(reader); + serd_sink_free(sink); + + return st; +} + +static void +test_all_sizes(const char* const str, const SerdSyntax syntax) +{ + // Ensure reading with the maximum stack size succeeds + SerdStatus st = test_size(str, syntax, max_stack_size); + assert(!st); + + // Test with an increasingly smaller stack + for (size_t size = max_stack_size; size > min_stack_size; --size) { + if ((st = test_size(str, syntax, size))) { + assert(st == SERD_ERR_OVERFLOW); + } + } + + assert(st == SERD_ERR_OVERFLOW); +} + +static void +test_ntriples_overflow(void) +{ + static const char* const test_strings[] = { + "<http://example.org/s> <http://example.org/p> <http://example.org/o> .", + NULL, + }; + + for (const char* const* t = test_strings; *t; ++t) { + test_all_sizes(*t, SERD_NTRIPLES); + } +} + +static void +test_turtle_overflow(void) +{ + static const char* const test_strings[] = { + "<http://example.org/s> <http://example.org/p> <http://example.org/> .", + "<http://example.org/s> <http://example.org/p> " + "<thisisanabsurdlylongurischeme://because/testing/> .", + "<http://example.org/s> <http://example.org/p> 1234 .", + "<http://example.org/s> <http://example.org/p> (1 2 3 4) .", + "<http://example.org/s> <http://example.org/p> ((((((((42)))))))) .", + "<http://example.org/s> <http://example.org/p> \"literal\" .", + "<http://example.org/s> <http://example.org/p> _:blank .", + "<http://example.org/s> <http://example.org/p> true .", + "<http://example.org/s> <http://example.org/p> \"\"@en .", + "(((((((((42))))))))) <http://example.org/p> <http://example.org/o> .", + "@prefix eg: <http://example.org/ns/test> .", + "@base <http://example.org/base> .", + + "@prefix eg: <http://example.org/> . \neg:s eg:p eg:o .\n", + + "@prefix ug.dot: <http://example.org/> . \nug.dot:s ug.dot:p ug.dot:o .\n", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "@prefix øøøøøøøøø: <http://example.org/long> . \n" + "<http://example.org/somewhatlongsubjecttooffsetthepredicate> øøøøøøøøø:p " + "øøøøøøøøø:o .\n", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "<http://example.org/subject/with/a/long/path> " + "<http://example.org/predicate/with/a/long/path> " + "<http://example.org/object/with/a/long/path> .", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "<http://example.org/s> <http://example.org/p> " + "\"typed\"^^<http://example.org/Datatype> .", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "@prefix eg: <http://example.org/ns/test> .\n" + "<http://example.org/s> <http://example.org/p> " + "\"typed\"^^eg:Datatype .", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "@prefix eg: <http://example.org/ns/test> .\n" + "<http://example.org/s> <http://example.org/p> eg:foo .", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "@prefix prefix: <http://example.org/testing/curies> .\n" + "prefix:subject prefix:predicate prefix:object .\n", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "@prefix prefix: <http://example.org/testing/curies> .\n" + "prefix:subjectthatwillcomearoundtobeingfinishedanycharacternow " + "prefix:predicate prefix:object .\n", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "@prefix eg: <http://example.org/> .\n" + "eg:s eg:p [ eg:p [ eg:p [ eg:p [ eg:p eg:o ] ] ] ] .\n", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "@prefix eg: <http://example.org/> .\n" + "eg:s eg:p ( 1 2 3 ( 4 5 6 ( 7 8 9 ) ) ) .\n", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "@prefix eg: <http://example.org/ns/test> .\n" + "<http://example.org/s> <http://example.org/p> eg:%99 .", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "@prefix øøøøøøøøø: <http://example.org/long> .\n" + "<http://example.org/somewhatlongsubjecttooffsetthepredicate> øøøøøøøøø:p " + "øøøøøøøøø:o .\n", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "@base <http://example.org/ns/test> .\n" + "<http://example.org/s> <http://example.org/p> <rel> .", + + NULL, + }; + + for (const char* const* t = test_strings; *t; ++t) { + test_all_sizes(*t, SERD_TURTLE); + } +} + +int +main(void) +{ + test_ntriples_overflow(); + test_turtle_overflow(); + return 0; +} |