diff options
-rw-r--r-- | src/n3.c | 281 | ||||
-rw-r--r-- | src/reader.c | 4 | ||||
-rw-r--r-- | src/reader.h | 19 | ||||
-rw-r--r-- | test/meson.build | 1 | ||||
-rw-r--r-- | test/test_overflow.c | 152 |
5 files changed, 320 insertions, 137 deletions
@@ -210,8 +210,8 @@ read_utf8_character(SerdReader* const reader, uint8_t bytes[4] = {0, 0, 0, 0}; SerdStatus st = read_utf8_bytes(reader, bytes, &size, c); if (st) { - push_bytes(reader, dest, replacement_char, 3); - return st; + const SerdStatus rst = push_bytes(reader, dest, replacement_char, 3); + return rst ? rst : st; } return push_bytes(reader, dest, bytes, size); @@ -227,8 +227,8 @@ read_utf8_code(SerdReader* const reader, uint8_t bytes[4] = {0, 0, 0, 0}; SerdStatus st = read_utf8_bytes(reader, bytes, &size, c); if (st) { - push_bytes(reader, dest, replacement_char, 3); - return st; + const SerdStatus rst = push_bytes(reader, dest, replacement_char, 3); + return rst ? rst : st; } if (!(st = push_bytes(reader, dest, bytes, size))) { @@ -361,8 +361,9 @@ read_STRING_LITERAL_LONG(SerdReader* const reader, st = read_string_escape(reader, ref); } else { ref->flags |= SERD_HAS_QUOTE; - push_byte(reader, ref, c); - st = read_character(reader, ref, (uint8_t)q2); + if (!(st = push_byte(reader, ref, c))) { + st = read_character(reader, ref, (uint8_t)q2); + } } } else if (c == EOF) { st = r_err(reader, SERD_BAD_SYNTAX, "end of file in long string\n"); @@ -404,7 +405,7 @@ read_STRING_LITERAL(SerdReader* const reader, } } - return eat_byte_check(reader, q) ? SERD_SUCCESS : SERD_BAD_SYNTAX; + return st ? st : eat_byte_check(reader, q); } static SerdStatus @@ -466,7 +467,7 @@ read_PN_CHARS_BASE(SerdReader* const reader, SerdNode* const dest) } skip_byte(reader, c); - read_utf8_code(reader, dest, &code, (uint8_t)c); + TRY(st, read_utf8_code(reader, dest, &code, (uint8_t)c)); if (!is_PN_CHARS_BASE(code)) { r_err(reader, SERD_BAD_SYNTAX, "invalid character U+%04X in name\n", code); @@ -514,15 +515,22 @@ read_PN_CHARS(SerdReader* const reader, SerdNode* const dest) static SerdStatus read_PERCENT(SerdReader* const reader, SerdNode* const dest) { - push_byte(reader, dest, eat_byte_safe(reader, '%')); + SerdStatus st = push_byte(reader, dest, eat_byte_safe(reader, '%')); + if (st) { + return st; + } + const uint8_t h1 = read_HEX(reader); const uint8_t h2 = read_HEX(reader); - if (h1 && h2) { - push_byte(reader, dest, h1); - return push_byte(reader, dest, h2); + if (!h1 || !h2) { + return SERD_BAD_SYNTAX; } - return SERD_BAD_SYNTAX; + if (!(st = push_byte(reader, dest, h1))) { + st = push_byte(reader, dest, h2); + } + + return st; } static SerdStatus @@ -552,13 +560,12 @@ read_PN_LOCAL_ESC(SerdReader* const reader, SerdNode* const dest) case '@': case '_': case '~': - push_byte(reader, dest, eat_byte_safe(reader, c)); - break; + return push_byte(reader, dest, eat_byte_safe(reader, c)); default: - return r_err(reader, SERD_BAD_SYNTAX, "invalid escape\n"); + break; } - return SERD_SUCCESS; + return r_err(reader, SERD_BAD_SYNTAX, "invalid escape\n"); } static SerdStatus @@ -596,7 +603,7 @@ read_PN_LOCAL(SerdReader* const reader, case '9': case ':': case '_': - push_byte(reader, dest, eat_byte_safe(reader, c)); + st = push_byte(reader, dest, eat_byte_safe(reader, c)); break; default: if ((st = read_PLX(reader, dest)) > SERD_FAILURE) { @@ -610,9 +617,9 @@ read_PN_LOCAL(SerdReader* const reader, while ((c = peek_byte(reader))) { // Middle: (PN_CHARS | '.' | ':')* if (c == '.' || c == ':') { - push_byte(reader, dest, eat_byte_safe(reader, c)); + st = push_byte(reader, dest, eat_byte_safe(reader, c)); } else if ((st = read_PLX(reader, dest)) > SERD_FAILURE) { - return r_err(reader, SERD_BAD_SYNTAX, "bad escape\n"); + return r_err(reader, st, "bad escape\n"); } else if (st != SERD_SUCCESS && (st = read_PN_CHARS(reader, dest))) { break; } @@ -644,12 +651,15 @@ read_PN_PREFIX_tail(SerdReader* const reader, SerdNode* const dest) } if (st <= SERD_FAILURE && - serd_node_string(dest)[serd_node_length(dest) - 1] == '.' && - read_PN_CHARS(reader, dest)) { - return r_err(reader, SERD_BAD_SYNTAX, "prefix ends with '.'\n"); + serd_node_string(dest)[serd_node_length(dest) - 1] == '.') { + if ((st = read_PN_CHARS(reader, dest))) { + return r_err(reader, + st > SERD_FAILURE ? st : SERD_BAD_SYNTAX, + "prefix ends with '.'\n"); + } } - return st > SERD_FAILURE ? st : SERD_SUCCESS; + return st; } static SerdStatus @@ -695,6 +705,7 @@ read_IRIREF_scheme(SerdReader* const reader, SerdNode* const dest) return r_err(reader, SERD_BAD_SYNTAX, "bad IRI scheme start '%c'\n", c); } + SerdStatus st = SERD_SUCCESS; while ((c = peek_byte(reader)) != EOF) { if (c == '>') { return r_err(reader, SERD_BAD_SYNTAX, "missing IRI scheme\n"); @@ -708,30 +719,33 @@ read_IRIREF_scheme(SerdReader* const reader, SerdNode* const dest) (char)c); } - push_byte(reader, dest, eat_byte_safe(reader, c)); + if ((st = push_byte(reader, dest, eat_byte_safe(reader, c)))) { + return st; + } + if (c == ':') { return SERD_SUCCESS; // End of scheme } } - return r_err(reader, SERD_BAD_SYNTAX, "unexpected end of file\n"); + return SERD_FAILURE; } static SerdStatus read_IRIREF(SerdReader* const reader, SerdNode** const dest) { - if (!eat_byte_check(reader, '<')) { - return SERD_BAD_SYNTAX; - } + SerdStatus st = SERD_SUCCESS; + TRY(st, eat_byte_check(reader, '<')); - *dest = push_node(reader, SERD_URI, "", 0); + if (!(*dest = push_node(reader, SERD_URI, "", 0))) { + return SERD_BAD_STACK; + } - if (!fancy_syntax(reader) && read_IRIREF_scheme(reader, *dest)) { - return r_err(reader, SERD_BAD_SYNTAX, "expected IRI scheme\n"); + if (!fancy_syntax(reader) && (st = read_IRIREF_scheme(reader, *dest))) { + return r_err(reader, st, "expected IRI scheme\n"); } - SerdStatus st = SERD_SUCCESS; - uint32_t code = 0; + uint32_t code = 0; while (st <= SERD_FAILURE) { const int c = eat_byte_safe(reader, peek_byte(reader)); switch (c) { @@ -773,10 +787,11 @@ read_IRIREF(SerdReader* const reader, SerdNode** const dest) break; } - st = SERD_FAILURE; - push_byte(reader, *dest, c); + if (!(st = push_byte(reader, *dest, c))) { + st = SERD_FAILURE; + } } else if (!(c & 0x80)) { - push_byte(reader, *dest, c); + st = push_byte(reader, *dest, c); } else if (read_utf8_character(reader, *dest, (uint8_t)c)) { if (reader->strict) { return SERD_BAD_SYNTAX; @@ -803,8 +818,7 @@ read_PrefixedName(SerdReader* const reader, return SERD_FAILURE; } - push_byte(reader, dest, eat_byte_safe(reader, ':')); - + TRY(st, push_byte(reader, dest, eat_byte_safe(reader, ':'))); TRY_FAILING(st, read_PN_LOCAL(reader, dest, ate_dot)); return SERD_SUCCESS; } @@ -822,7 +836,7 @@ read_0_9(SerdReader* const reader, SerdNode* const str, const bool at_least_one) return r_err(reader, SERD_BAD_SYNTAX, "expected digit\n"); } - return SERD_SUCCESS; + return st; } static SerdStatus @@ -844,13 +858,13 @@ read_number(SerdReader* const reader, } if (c == '-' || c == '+') { - push_byte(reader, *dest, eat_byte_safe(reader, c)); + TRY(st, push_byte(reader, *dest, eat_byte_safe(reader, c))); } if ((c = peek_byte(reader)) == '.') { has_decimal = true; - // decimal case 2 (e.g. '.0' or '-.0' or '+.0') - push_byte(reader, *dest, eat_byte_safe(reader, c)); + // decimal case 2 (e.g. ".0" or "-.0" or "+.0") + TRY(st, push_byte(reader, *dest, eat_byte_safe(reader, c))); TRY(st, read_0_9(reader, *dest, true)); } else { // all other cases ::= ( '-' | '+' ) [0-9]+ ( . )? ( [0-9]+ )? ... @@ -866,7 +880,7 @@ read_number(SerdReader* const reader, return SERD_SUCCESS; // Next byte is not a number character } - push_byte(reader, *dest, '.'); + TRY(st, push_byte(reader, *dest, '.')); read_0_9(reader, *dest, false); } } @@ -876,11 +890,11 @@ read_number(SerdReader* const reader, c = peek_byte(reader); if (c == 'e' || c == 'E') { // double - push_byte(reader, *dest, eat_byte_safe(reader, c)); + TRY(st, push_byte(reader, *dest, eat_byte_safe(reader, c))); switch ((c = peek_byte(reader))) { case '+': case '-': - push_byte(reader, *dest, eat_byte_safe(reader, c)); + TRY(st, push_byte(reader, *dest, eat_byte_safe(reader, c))); break; default: break; @@ -906,10 +920,9 @@ read_iri(SerdReader* const reader, SerdNode** const dest, bool* const ate_dot) case '<': return read_IRIREF(reader, dest); default: - if (!(*dest = push_node(reader, SERD_CURIE, "", 0))) { - return SERD_BAD_STACK; - } - return read_PrefixedName(reader, *dest, true, ate_dot); + *dest = push_node(reader, SERD_CURIE, "", 0); + return *dest ? read_PrefixedName(reader, *dest, true, ate_dot) + : SERD_BAD_STACK; } } @@ -918,11 +931,12 @@ read_literal(SerdReader* const reader, SerdNode** const dest, bool* const ate_dot) { - *dest = push_node(reader, SERD_LITERAL, "", 0); + if (!(*dest = push_node(reader, SERD_LITERAL, "", 0))) { + return SERD_BAD_STACK; + } SerdStatus st = read_String(reader, *dest); if (st) { - *dest = NULL; return st; } @@ -931,20 +945,13 @@ read_literal(SerdReader* const reader, case '@': skip_byte(reader, '@'); (*dest)->flags |= SERD_HAS_LANGUAGE; - if ((st = read_LANGTAG(reader))) { - return r_err(reader, st, "bad literal\n"); - } + TRY(st, read_LANGTAG(reader)); break; case '^': skip_byte(reader, '^'); - if (!eat_byte_check(reader, '^')) { - return r_err(reader, SERD_BAD_SYNTAX, "expected '^'\n"); - } - + TRY(st, eat_byte_check(reader, '^')); (*dest)->flags |= SERD_HAS_DATATYPE; - if ((st = read_iri(reader, &datatype, ate_dot))) { - return r_err(reader, st, "bad datatype\n"); - } + TRY(st, read_iri(reader, &datatype, ate_dot)); break; } return SERD_SUCCESS; @@ -965,22 +972,26 @@ read_verb(SerdReader* const reader, SerdNode** const dest) return SERD_BAD_STACK; } - SerdStatus st = read_PN_PREFIX(reader, *dest); - bool ate_dot = false; - SerdNode* node = *dest; - const int next = peek_byte(reader); - if (!st && node->length == 1 && serd_node_string(node)[0] == 'a' && - next != ':' && !is_PN_CHARS_BASE((uint32_t)next)) { + SerdStatus st = read_PN_PREFIX(reader, *dest); + if (st > SERD_FAILURE) { + return st; + } + + bool ate_dot = false; + SerdNode* node = *dest; + const int next = peek_byte(reader); + if (node->length == 1 && serd_node_string(node)[0] == 'a' && next != ':' && + !is_PN_CHARS_BASE((uint32_t)next)) { serd_stack_pop_to(&reader->stack, orig_stack_size); - *dest = push_node(reader, SERD_URI, NS_RDF "type", 47); - return SERD_SUCCESS; + return ((*dest = push_node(reader, SERD_URI, NS_RDF "type", 47)) + ? SERD_SUCCESS + : SERD_BAD_STACK); } - if (st > SERD_FAILURE || - (st = read_PrefixedName(reader, *dest, false, &ate_dot)) || ate_dot) { + if ((st = read_PrefixedName(reader, *dest, false, &ate_dot)) || ate_dot) { *dest = NULL; - st = st > SERD_FAILURE ? st : SERD_BAD_SYNTAX; - return r_err(reader, st, "bad verb\n"); + return r_err( + reader, st > SERD_FAILURE ? st : SERD_BAD_SYNTAX, "expected verb\n"); } return SERD_SUCCESS; @@ -991,21 +1002,22 @@ read_BLANK_NODE_LABEL(SerdReader* const reader, SerdNode** const dest, bool* const ate_dot) { - skip_byte(reader, '_'); - if (!eat_byte_check(reader, ':')) { - return SERD_BAD_SYNTAX; - } - SerdStatus st = SERD_SUCCESS; - SerdNode* n = *dest = push_node(reader, - SERD_BLANK, - reader->bprefix ? reader->bprefix : "", - reader->bprefix_len); + skip_byte(reader, '_'); + TRY(st, eat_byte_check(reader, ':')); - int c = peek_byte(reader); // First: (PN_CHARS | '_' | [0-9]) + if (!(*dest = push_node(reader, + SERD_BLANK, + reader->bprefix ? reader->bprefix : "", + reader->bprefix_len))) { + return SERD_BAD_STACK; + } + + SerdNode* n = *dest; + int c = peek_byte(reader); // First: (PN_CHARS | '_' | [0-9]) if (is_digit(c) || c == '_') { - push_byte(reader, n, eat_byte_safe(reader, c)); + TRY(st, push_byte(reader, n, eat_byte_safe(reader, c))); } else if ((st = read_PN_CHARS(reader, n))) { st = st > SERD_FAILURE ? st : SERD_BAD_SYNTAX; return r_err(reader, st, "invalid name start\n"); @@ -1065,7 +1077,9 @@ read_anon(SerdReader* const reader, } if (!*dest) { - *dest = blank_id(reader); + if (!(*dest = blank_id(reader))) { + return SERD_BAD_STACK; + } } // Emit statement with this anonymous object first @@ -1081,8 +1095,9 @@ read_anon(SerdReader* const reader, if (!subject) { *ctx.flags |= SERD_ANON_CONT; } + bool ate_dot_in_list = false; - TRY_FAILING(st, read_predicateObjectList(reader, ctx, &ate_dot_in_list)); + TRY(st, read_predicateObjectList(reader, ctx, &ate_dot_in_list)); if (ate_dot_in_list) { return r_err(reader, SERD_BAD_SYNTAX, "'.' inside blank\n"); @@ -1092,9 +1107,7 @@ read_anon(SerdReader* const reader, *ctx.flags = old_flags; } - return st > SERD_FAILURE ? st - : (eat_byte_check(reader, ']') == ']') ? SERD_SUCCESS - : SERD_BAD_SYNTAX; + return st > SERD_FAILURE ? st : eat_byte_check(reader, ']'); } /* If emit is true: recurses, calling statement_sink for every statement @@ -1126,6 +1139,7 @@ read_object(SerdReader* const reader, return r_err(reader, SERD_BAD_SYNTAX, "expected: ':', '<', or '_'\n"); } } + switch (c) { case EOF: case ')': @@ -1142,6 +1156,8 @@ read_object(SerdReader* const reader, st = read_BLANK_NODE_LABEL(reader, &o, ate_dot); break; case '<': + st = read_IRIREF(reader, &o); + break; case ':': st = read_iri(reader, &o, ate_dot); break; @@ -1172,21 +1188,26 @@ read_object(SerdReader* const reader, return SERD_BAD_STACK; } - while (!read_PN_CHARS_BASE(reader, o)) { + while (!(st = read_PN_CHARS_BASE(reader, o))) { + } + + if (st > SERD_FAILURE) { + return st; } + if ((o->length == 4 && !memcmp(serd_node_string(o), "true", 4)) || (o->length == 5 && !memcmp(serd_node_string(o), "false", 5))) { o->flags |= SERD_HAS_DATATYPE; o->type = SERD_LITERAL; - push_node(reader, SERD_URI, XSD_BOOLEAN, XSD_BOOLEAN_LEN); - st = SERD_SUCCESS; - } else if (read_PN_PREFIX_tail(reader, o) > SERD_FAILURE) { - st = SERD_BAD_SYNTAX; - } else { - if ((st = read_PrefixedName(reader, o, false, ate_dot))) { - st = st > SERD_FAILURE ? st : SERD_BAD_SYNTAX; - return r_err(reader, st, "expected prefixed name\n"); + if (!(push_node(reader, SERD_URI, XSD_BOOLEAN, XSD_BOOLEAN_LEN))) { + st = SERD_BAD_STACK; + } else { + st = SERD_SUCCESS; } + } else if ((st = read_PN_PREFIX_tail(reader, o)) > SERD_FAILURE || + (st = read_PrefixedName(reader, o, false, ate_dot))) { + st = (st > SERD_FAILURE) ? st : SERD_BAD_SYNTAX; + return r_err(reader, st, "expected prefixed name\n"); } } @@ -1270,11 +1291,8 @@ static SerdStatus end_collection(SerdReader* const reader, ReadContext ctx, const SerdStatus st) { *ctx.flags &= ~(unsigned)SERD_LIST_CONT; - if (!st) { - return (eat_byte_check(reader, ')') == ')') ? SERD_SUCCESS - : SERD_BAD_SYNTAX; - } - return st; + + return st ? st : eat_byte_check(reader, ')'); } static SerdStatus @@ -1287,8 +1305,10 @@ read_collection(SerdReader* const reader, skip_byte(reader, '('); bool end = peek_delim(reader, ')'); + if (!(*dest = end ? reader->rdf_nil : blank_id(reader))) { + return SERD_BAD_STACK; + } - *dest = end ? reader->rdf_nil : blank_id(reader); if (ctx.subject) { // Reading a collection object *ctx.flags |= (end ? 0 : SERD_LIST_O_BEGIN); TRY(st, emit_statement(reader, ctx, *dest)); @@ -1327,6 +1347,7 @@ read_collection(SerdReader* const reader, used and > IDs generated by read_object above. */ if (!rest) { rest = blank_id(reader); // First pass, push + assert(rest); // Can't overflow since read_object() popped } else { set_blank_id(reader, rest, genid_size(reader)); } @@ -1377,16 +1398,16 @@ read_subject(SerdReader* const reader, static SerdStatus read_labelOrSubject(SerdReader* const reader, SerdNode** const dest) { - bool ate_dot = false; + SerdStatus st = SERD_SUCCESS; + bool ate_dot = false; + switch (peek_byte(reader)) { case '[': skip_byte(reader, '['); read_ws_star(reader); - if (!eat_byte_check(reader, ']')) { - return SERD_BAD_SYNTAX; - } + TRY(st, eat_byte_check(reader, ']')); *dest = blank_id(reader); - return SERD_SUCCESS; + return *dest ? SERD_SUCCESS : SERD_BAD_STACK; case '_': return read_BLANK_NODE_LABEL(reader, dest, &ate_dot); default: @@ -1430,12 +1451,17 @@ read_base(SerdReader* const reader, const bool sparql, const bool token) SerdNode* uri = NULL; TRY(st, read_IRIREF(reader, &uri)); + + if (reader->stack.size + sizeof(SerdNode) > reader->stack.buf_size) { + return SERD_BAD_STACK; + } + serd_node_zero_pad(uri); TRY(st, serd_sink_write_base(reader->sink, uri)); read_ws_star(reader); if (!sparql) { - return eat_byte_check(reader, '.') ? SERD_SUCCESS : SERD_BAD_SYNTAX; + return eat_byte_check(reader, '.'); } if (peek_byte(reader) == '.') { @@ -1461,21 +1487,23 @@ read_prefixID(SerdReader* const reader, const bool sparql, const bool token) TRY_FAILING(st, read_PN_PREFIX(reader, name)); - if (eat_byte_check(reader, ':') != ':') { - return SERD_BAD_SYNTAX; - } - + TRY(st, eat_byte_check(reader, ':')); read_ws_star(reader); + SerdNode* uri = NULL; TRY(st, read_IRIREF(reader, &uri)); + if (reader->stack.size + sizeof(SerdNode) > reader->stack.buf_size) { + return SERD_BAD_STACK; + } + serd_node_zero_pad(name); serd_node_zero_pad(uri); st = serd_sink_write_prefix(reader->sink, name, uri); if (!sparql) { read_ws_star(reader); - st = eat_byte_check(reader, '.') ? SERD_SUCCESS : SERD_BAD_SYNTAX; + st = eat_byte_check(reader, '.'); } return st; } @@ -1510,20 +1538,18 @@ read_directive(SerdReader* const reader) static SerdStatus read_wrappedGraph(SerdReader* const reader, ReadContext* const ctx) { - if (!eat_byte_check(reader, '{')) { - return SERD_BAD_SYNTAX; - } - + SerdStatus st = SERD_SUCCESS; + TRY(st, eat_byte_check(reader, '{')); read_ws_star(reader); + while (peek_byte(reader) != '}') { const size_t orig_stack_size = reader->stack.size; bool ate_dot = false; int s_type = 0; - ctx->subject = 0; - SerdStatus st = read_subject(reader, *ctx, &ctx->subject, &s_type); - if (st) { - return r_err(reader, SERD_BAD_SYNTAX, "bad subject\n"); + ctx->subject = 0; + if ((st = read_subject(reader, *ctx, &ctx->subject, &s_type))) { + return r_err(reader, st, "expected subject\n"); } if ((st = read_triples(reader, *ctx, &ate_dot)) && s_type != '[') { @@ -1624,8 +1650,7 @@ read_n3_statement(SerdReader* const reader) return st > SERD_FAILURE ? st : SERD_BAD_SYNTAX; } else if (!ate_dot) { read_ws_star(reader); - st = - (eat_byte_check(reader, '.') == '.') ? SERD_SUCCESS : SERD_BAD_SYNTAX; + st = eat_byte_check(reader, '.'); } break; } @@ -1708,9 +1733,7 @@ read_nquads_statement(SerdReader* const reader) // Terminating '.' read_ws_star(reader); - if (!eat_byte_check(reader, '.')) { - return SERD_BAD_SYNTAX; - } + TRY(st, eat_byte_check(reader, '.')); } TRY(st, emit_statement(reader, ctx, ctx.object)); diff --git a/src/reader.c b/src/reader.c index fc39d7fa..25c4b3b1 100644 --- a/src/reader.c +++ b/src/reader.c @@ -114,6 +114,10 @@ emit_statement(SerdReader* const reader, graph = reader->default_graph; } + if (reader->stack.size + (2 * sizeof(SerdNode)) > reader->stack.buf_size) { + return SERD_BAD_STACK; + } + /* Zero the pad of the object node on the top of the stack. Lower nodes (subject and predicate) were already zeroed by subsequent pushes. */ serd_node_zero_pad(o); diff --git a/src/reader.h b/src/reader.h index 00a41794..c562ac6c 100644 --- a/src/reader.h +++ b/src/reader.h @@ -7,6 +7,7 @@ #include "byte_source.h" #include "node.h" #include "stack.h" +#include "try.h" #include "serd/attributes.h" #include "serd/error.h" @@ -119,26 +120,28 @@ eat_byte_safe(SerdReader* reader, const int byte) return byte; } -static inline int SERD_NODISCARD +static inline SerdStatus SERD_NODISCARD eat_byte_check(SerdReader* reader, const int byte) { const int c = peek_byte(reader); if (c != byte) { - r_err(reader, SERD_BAD_SYNTAX, "expected '%c', not '%c'\n", byte, c); - return 0; + return r_err(reader, SERD_BAD_SYNTAX, "expected '%c', not '%c'\n", byte, c); } - return eat_byte_safe(reader, byte); + + skip_byte(reader, c); + return SERD_SUCCESS; } static inline SerdStatus eat_string(SerdReader* reader, const char* str, unsigned n) { + SerdStatus st = SERD_SUCCESS; + for (unsigned i = 0; i < n; ++i) { - if (!eat_byte_check(reader, str[i])) { - return SERD_BAD_SYNTAX; - } + TRY(st, eat_byte_check(reader, str[i])); } - return SERD_SUCCESS; + + return st; } static inline SerdStatus diff --git a/test/meson.build b/test/meson.build index d9a386c3..ffd65a17 100644 --- a/test/meson.build +++ b/test/meson.build @@ -122,6 +122,7 @@ unit_tests = [ 'env', 'free_null', 'node', + 'overflow', 'reader_writer', 'sink', 'string', diff --git a/test/test_overflow.c b/test/test_overflow.c new file mode 100644 index 00000000..ac4a490b --- /dev/null +++ b/test/test_overflow.c @@ -0,0 +1,152 @@ +// Copyright 2018 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#undef NDEBUG + +#include "serd/serd.h" + +#include <assert.h> +#include <stdio.h> + +static const size_t min_stack_size = 4U * sizeof(size_t) + 240U; +static const size_t max_stack_size = 1024U; + +static SerdStatus +test_size(const char* const str, + const SerdSyntax syntax, + const size_t stack_size) +{ + SerdSink* sink = serd_sink_new(NULL, NULL); + SerdReader* const reader = serd_reader_new(syntax, sink, stack_size); + if (!reader) { + return SERD_BAD_STACK; + } + + serd_reader_start_string(reader, str); + const SerdStatus st = serd_reader_read_document(reader); + serd_reader_free(reader); + serd_sink_free(sink); + + return st; +} + +static void +test_all_sizes(const char* const str, const SerdSyntax syntax) +{ + // Ensure reading with the maximum stack size succeeds + SerdStatus st = test_size(str, syntax, max_stack_size); + assert(!st); + + // Test with an increasingly smaller stack + for (size_t size = max_stack_size; size > min_stack_size; --size) { + if ((st = test_size(str, syntax, size))) { + assert(st == SERD_BAD_STACK); + } + } + + assert(st == SERD_BAD_STACK); +} + +static void +test_ntriples_overflow(void) +{ + static const char* const test_strings[] = { + "<http://example.org/s> <http://example.org/p> <http://example.org/o> .", + NULL, + }; + + for (const char* const* t = test_strings; *t; ++t) { + test_all_sizes(*t, SERD_NTRIPLES); + } +} + +static void +test_turtle_overflow(void) +{ + static const char* const test_strings[] = { + "<http://example.org/s> <http://example.org/p> <http://example.org/> .", + "<http://example.org/s> <http://example.org/p> " + "<thisisanabsurdlylongurischeme://because/testing/> .", + "<http://example.org/s> <http://example.org/p> 1234 .", + "<http://example.org/s> <http://example.org/p> (1 2 3 4) .", + "<http://example.org/s> <http://example.org/p> ((((((((42)))))))) .", + "<http://example.org/s> <http://example.org/p> \"literal\" .", + "<http://example.org/s> <http://example.org/p> _:blank .", + "<http://example.org/s> <http://example.org/p> true .", + "<http://example.org/s> <http://example.org/p> \"\"@en .", + "(((((((((42))))))))) <http://example.org/p> <http://example.org/o> .", + "@prefix eg: <http://example.org/ns/test> .", + "@base <http://example.org/base> .", + + "@prefix eg: <http://example.org/> . \neg:s eg:p eg:o .\n", + + "@prefix ug.dot: <http://example.org/> . \nug.dot:s ug.dot:p ug.dot:o .\n", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "@prefix øøøøøøøøø: <http://example.org/long> . \n" + "<http://example.org/somewhatlongsubjecttooffsetthepredicate> øøøøøøøøø:p " + "øøøøøøøøø:o .\n", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "<http://example.org/subject/with/a/long/path> " + "<http://example.org/predicate/with/a/long/path> " + "<http://example.org/object/with/a/long/path> .", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "<http://example.org/s> <http://example.org/p> " + "\"typed\"^^<http://example.org/Datatype> .", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "@prefix eg: <http://example.org/ns/test> .\n" + "<http://example.org/s> <http://example.org/p> " + "\"typed\"^^eg:Datatype .", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "@prefix eg: <http://example.org/ns/test> .\n" + "<http://example.org/s> <http://example.org/p> eg:foo .", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "@prefix prefix: <http://example.org/testing/curies> .\n" + "prefix:subject prefix:predicate prefix:object .\n", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "@prefix prefix: <http://example.org/testing/curies> .\n" + "prefix:subjectthatwillcomearoundtobeingfinishedanycharacternow " + "prefix:predicate prefix:object .\n", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "@prefix eg: <http://example.org/> .\n" + "eg:s eg:p [ eg:p [ eg:p [ eg:p [ eg:p eg:o ] ] ] ] .\n", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "@prefix eg: <http://example.org/> .\n" + "eg:s eg:p ( 1 2 3 ( 4 5 6 ( 7 8 9 ) ) ) .\n", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "@prefix eg: <http://example.org/ns/test> .\n" + "<http://example.org/s> <http://example.org/p> eg:%99 .", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "@prefix øøøøøøøøø: <http://example.org/long> .\n" + "<http://example.org/somewhatlongsubjecttooffsetthepredicate> øøøøøøøøø:p " + "øøøøøøøøø:o .\n", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "@base <http://example.org/ns/test> .\n" + "<http://example.org/s> <http://example.org/p> <rel> .", + + NULL, + }; + + for (const char* const* t = test_strings; *t; ++t) { + test_all_sizes(*t, SERD_TURTLE); + } +} + +int +main(void) +{ + test_ntriples_overflow(); + test_turtle_overflow(); + return 0; +} |