From 42ac3e7a0516b4c5d3b13a4cb842ab3f3d368101 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Sat, 15 Aug 2020 20:22:26 +0200 Subject: Handle read errors more precisely --- include/serd/serd.h | 3 ++- src/n3.c | 44 ++++++++++++++++++-------------------------- src/reader.c | 2 +- src/string.c | 2 ++ test/test_string.c | 2 +- 5 files changed, 24 insertions(+), 29 deletions(-) diff --git a/include/serd/serd.h b/include/serd/serd.h index f5ee9022..80e2d3d5 100644 --- a/include/serd/serd.h +++ b/include/serd/serd.h @@ -119,7 +119,8 @@ typedef enum { SERD_ERR_ID_CLASH, ///< Encountered clashing blank node IDs SERD_ERR_BAD_CURIE, ///< Invalid CURIE (e.g. prefix does not exist) SERD_ERR_INTERNAL, ///< Unexpected internal error (should not happen) - SERD_ERR_OVERFLOW ///< Stack overflow + SERD_ERR_OVERFLOW, ///< Stack overflow + SERD_ERR_NO_DATA ///< Unexpected end of input } SerdStatus; /// RDF syntax type diff --git a/src/n3.c b/src/n3.c index 97757c64..aac4489d 100644 --- a/src/n3.c +++ b/src/n3.c @@ -214,11 +214,14 @@ read_utf8_character(SerdReader* reader, SerdNode* dest, uint8_t c) uint32_t size = 0; uint8_t bytes[4] = {0, 0, 0, 0}; SerdStatus st = read_utf8_bytes(reader, bytes, &size, c); - if (st) { - push_bytes(reader, dest, replacement_char, 3); + if (st && reader->strict) { return st; } + if (st) { + return push_bytes(reader, dest, replacement_char, 3); + } + return push_bytes(reader, dest, bytes, size); } @@ -229,8 +232,7 @@ read_utf8_code(SerdReader* reader, SerdNode* dest, uint32_t* code, uint8_t c) uint8_t bytes[4] = {0, 0, 0, 0}; SerdStatus st = read_utf8_bytes(reader, bytes, &size, c); if (st) { - push_bytes(reader, dest, replacement_char, 3); - return st; + return reader->strict ? st : push_bytes(reader, dest, replacement_char, 3); } if (!(st = push_bytes(reader, dest, bytes, size))) { @@ -328,8 +330,7 @@ static SerdStatus read_STRING_LITERAL_LONG(SerdReader* reader, SerdNode* ref, uint8_t q) { SerdStatus st = SERD_SUCCESS; - - while (!(st && reader->strict)) { + while (!st || (st == SERD_ERR_BAD_SYNTAX && !reader->strict)) { const int c = peek_byte(reader); if (c == '\\') { eat_byte_safe(reader, c); @@ -338,6 +339,8 @@ read_STRING_LITERAL_LONG(SerdReader* reader, SerdNode* ref, uint8_t q) (st = read_UCHAR(reader, ref, &code))) { return r_err(reader, st, "invalid escape `\\%c'\n", peek_byte(reader)); } + } else if (c == EOF) { + st = r_err(reader, SERD_ERR_NO_DATA, "unexpected end of file\n"); } else if (c == q) { eat_byte_safe(reader, q); const int q2 = eat_byte_safe(reader, peek_byte(reader)); @@ -349,8 +352,6 @@ read_STRING_LITERAL_LONG(SerdReader* reader, SerdNode* ref, uint8_t q) ref->flags |= SERD_HAS_QUOTE; push_byte(reader, ref, c); st = read_character(reader, ref, (uint8_t)q2); - } else if (c == EOF) { - return r_err(reader, SERD_ERR_BAD_SYNTAX, "end of file in long string\n"); } else { st = read_character(reader, ref, (uint8_t)eat_byte_safe(reader, c)); } @@ -365,8 +366,7 @@ static SerdStatus read_STRING_LITERAL(SerdReader* reader, SerdNode* ref, uint8_t q) { SerdStatus st = SERD_SUCCESS; - - while (!(st && reader->strict)) { + while (!st || (st == SERD_ERR_BAD_SYNTAX && !reader->strict)) { const int c = peek_byte(reader); uint32_t code = 0; switch (c) { @@ -385,7 +385,7 @@ read_STRING_LITERAL(SerdReader* reader, SerdNode* ref, uint8_t q) break; default: if (c == q) { - eat_byte_check(reader, q); + eat_byte_safe(reader, q); return SERD_SUCCESS; } else { st = read_character(reader, ref, (uint8_t)eat_byte_safe(reader, c)); @@ -393,9 +393,7 @@ read_STRING_LITERAL(SerdReader* reader, SerdNode* ref, uint8_t q) } } - return st ? st - : eat_byte_check(reader, q) ? SERD_SUCCESS - : SERD_ERR_BAD_SYNTAX; + return st; } static SerdStatus @@ -765,9 +763,7 @@ read_IRIREF(SerdReader* reader, SerdNode** dest) } else if (!(c & 0x80)) { push_byte(reader, *dest, c); } else if (read_utf8_character(reader, *dest, (uint8_t)c)) { - if (reader->strict) { - return SERD_ERR_BAD_SYNTAX; - } + st = reader->strict ? SERD_ERR_BAD_SYNTAX : SERD_FAILURE; } } } @@ -1210,7 +1206,7 @@ read_objectList(SerdReader* reader, ReadContext ctx, bool* ate_dot) reader, SERD_ERR_BAD_SYNTAX, "syntax does not support abbreviation\n"); } - while (!*ate_dot && eat_delim(reader, ',')) { + while (st <= SERD_FAILURE && !*ate_dot && eat_delim(reader, ',')) { st = read_object(reader, &ctx, true, ate_dot); } @@ -1634,16 +1630,14 @@ read_turtleTrigDoc(SerdReader* reader) const size_t orig_stack_size = reader->stack.size; const SerdStatus st = read_n3_statement(reader); if (st > SERD_FAILURE) { - if (reader->strict) { + if (reader->strict || reader->source.eof || st == SERD_ERR_OVERFLOW) { serd_stack_pop_to(&reader->stack, orig_stack_size); return st; } - skip_until(reader, '\n'); } serd_stack_pop_to(&reader->stack, orig_stack_size); } - return SERD_SUCCESS; } @@ -1651,7 +1645,7 @@ SerdStatus read_nquadsDoc(SerdReader* reader) { SerdStatus st = SERD_SUCCESS; - while (!reader->source.eof) { + while (!st && !reader->source.eof) { const size_t orig_stack_size = reader->stack.size; SerdStatementFlags flags = 0; @@ -1696,10 +1690,8 @@ read_nquadsDoc(SerdReader* reader) } } - TRY(st, emit_statement(reader, ctx, ctx.object)); - + st = emit_statement(reader, ctx, ctx.object); serd_stack_pop_to(&reader->stack, orig_stack_size); } - - return SERD_SUCCESS; + return st; } diff --git a/src/reader.c b/src/reader.c index 8b687864..5daec8f8 100644 --- a/src/reader.c +++ b/src/reader.c @@ -286,7 +286,7 @@ serd_reader_prepare(SerdReader* reader) } else if (st == SERD_FAILURE) { reader->source.eof = true; } else { - r_err(reader, st, "read error: %s\n", strerror(errno)); + r_err(reader, st, "error preparing read: %s\n", strerror(errno)); } return st; } diff --git a/src/string.c b/src/string.c index f59a5fd6..e3d20b15 100644 --- a/src/string.c +++ b/src/string.c @@ -56,6 +56,8 @@ serd_strerror(SerdStatus status) return "Internal error"; case SERD_ERR_OVERFLOW: return "Stack overflow"; + case SERD_ERR_NO_DATA: + return "Unexpectd end of input"; default: break; } diff --git a/test/test_string.c b/test/test_string.c index a3fb9247..472b464a 100644 --- a/test/test_string.c +++ b/test/test_string.c @@ -39,7 +39,7 @@ test_strerror(void) { const char* msg = serd_strerror(SERD_SUCCESS); assert(!strcmp(msg, "Success")); - for (int i = SERD_FAILURE; i <= SERD_ERR_OVERFLOW; ++i) { + for (int i = SERD_FAILURE; i <= SERD_ERR_NO_DATA; ++i) { msg = serd_strerror((SerdStatus)i); assert(strcmp(msg, "Success")); } -- cgit v1.2.1