diff options
-rw-r--r-- | include/serd/serd.h | 1 | ||||
-rw-r--r-- | src/n3.c | 48 | ||||
-rw-r--r-- | src/reader.c | 17 | ||||
-rw-r--r-- | src/reader.h | 4 | ||||
-rw-r--r-- | src/string.c | 2 | ||||
-rw-r--r-- | test/test_string.c | 2 |
6 files changed, 47 insertions, 27 deletions
diff --git a/include/serd/serd.h b/include/serd/serd.h index 77b42a24..8d00f4e5 100644 --- a/include/serd/serd.h +++ b/include/serd/serd.h @@ -205,6 +205,7 @@ typedef enum { SERD_ERR_OVERFLOW, ///< Stack overflow SERD_ERR_BAD_TEXT, ///< Invalid text encoding SERD_ERR_BAD_WRITE, ///< Error writing to file/stream + SERD_ERR_NO_DATA, ///< Unexpected end of input } SerdStatus; /** @@ -224,6 +224,11 @@ read_utf8_character(SerdReader* const reader, uint32_t size = 0; uint8_t bytes[4] = {0, 0, 0, 0}; SerdStatus st = read_utf8_bytes(reader, bytes, &size, c); + + if (!tolerate_status(reader, st)) { + return st; + } + if (st) { const SerdStatus rst = push_bytes(reader, dest, replacement_char, 3); return rst ? rst : st; @@ -343,8 +348,7 @@ read_STRING_LITERAL_LONG(SerdReader* const reader, const uint8_t q) { SerdStatus st = SERD_SUCCESS; - - while (!(st && reader->strict)) { + while (tolerate_status(reader, st)) { const int c = peek_byte(reader); if (c == '\\') { eat_byte_safe(reader, c); @@ -353,6 +357,8 @@ read_STRING_LITERAL_LONG(SerdReader* const reader, (st = read_UCHAR(reader, ref, &code))) { return r_err(reader, st, "invalid escape `\\%c'\n", peek_byte(reader)); } + } else if (c == EOF) { + st = r_err(reader, SERD_ERR_NO_DATA, "unexpected end of file\n"); } else if (c == q) { eat_byte_safe(reader, q); const int q2 = eat_byte_safe(reader, peek_byte(reader)); @@ -365,14 +371,12 @@ read_STRING_LITERAL_LONG(SerdReader* const reader, if (!(st = push_byte(reader, ref, c))) { st = read_character(reader, ref, (uint8_t)q2); } - } else if (c == EOF) { - return r_err(reader, SERD_ERR_BAD_SYNTAX, "end of file in long string\n"); } else { st = read_character(reader, ref, (uint8_t)eat_byte_safe(reader, c)); } } - return (st && reader->strict) ? st : SERD_SUCCESS; + return tolerate_status(reader, st) ? SERD_SUCCESS : st; } // STRING_LITERAL_QUOTE and STRING_LITERAL_SINGLE_QUOTE @@ -384,7 +388,7 @@ read_STRING_LITERAL(SerdReader* const reader, { SerdStatus st = SERD_SUCCESS; - while (!(st && reader->strict)) { + while (tolerate_status(reader, st)) { const int c = peek_byte(reader); uint32_t code = 0; switch (c) { @@ -403,14 +407,15 @@ read_STRING_LITERAL(SerdReader* const reader, break; default: if (c == q) { - return eat_byte_check(reader, q); + eat_byte_safe(reader, q); + return SERD_SUCCESS; } else { st = read_character(reader, ref, (uint8_t)eat_byte_safe(reader, c)); } } } - return st ? st : eat_byte_check(reader, q); + return tolerate_status(reader, st) ? SERD_SUCCESS : st; } static SerdStatus @@ -794,15 +799,13 @@ read_IRIREF(SerdReader* const reader, SerdNode** const dest) } } else if (!(c & 0x80)) { st = push_byte(reader, *dest, c); - } else if (read_utf8_character(reader, *dest, (uint8_t)c)) { - if (reader->strict) { - return SERD_ERR_BAD_SYNTAX; - } + } else { + st = read_utf8_character(reader, *dest, (uint8_t)c); } } } - return st; + return tolerate_status(reader, st) ? SERD_SUCCESS : st; } static SerdStatus @@ -1057,7 +1060,7 @@ read_BLANK_NODE_LABEL(SerdReader* const reader, } } - return SERD_SUCCESS; + return tolerate_status(reader, st) ? SERD_SUCCESS : st; } static SerdStatus @@ -1093,10 +1096,7 @@ read_anon(SerdReader* const reader, ctx.subject = *dest; if (!empty) { bool ate_dot_in_list = false; - st = read_predicateObjectList(reader, ctx, &ate_dot_in_list); - if (st > SERD_FAILURE) { - return st; - } + TRY(st, read_predicateObjectList(reader, ctx, &ate_dot_in_list)); if (ate_dot_in_list) { return r_err(reader, SERD_ERR_BAD_SYNTAX, "`.' inside blank\n"); @@ -1238,7 +1238,7 @@ read_objectList(SerdReader* const reader, ReadContext ctx, bool* const ate_dot) reader, SERD_ERR_BAD_SYNTAX, "syntax does not support abbreviation\n"); } - while (!*ate_dot && eat_delim(reader, ',')) { + while (st <= SERD_FAILURE && !*ate_dot && eat_delim(reader, ',')) { st = read_object(reader, &ctx, true, ate_dot); } @@ -1681,7 +1681,7 @@ read_turtleTrigDoc(SerdReader* const reader) const size_t orig_stack_size = reader->stack.size; const SerdStatus st = read_n3_statement(reader); if (st > SERD_FAILURE) { - if (reader->strict) { + if (!tolerate_status(reader, st)) { serd_stack_pop_to(&reader->stack, orig_stack_size); return st; } @@ -1698,7 +1698,7 @@ SerdStatus read_nquadsDoc(SerdReader* const reader) { SerdStatus st = SERD_SUCCESS; - while (!reader->source.eof) { + while (!st && !reader->source.eof) { const size_t orig_stack_size = reader->stack.size; SerdStatementFlags flags = 0; @@ -1741,12 +1741,10 @@ read_nquadsDoc(SerdReader* const reader) TRY(st, eat_byte_check(reader, '.')); } - TRY(st, emit_statement(reader, ctx, ctx.object)); - + st = emit_statement(reader, ctx, ctx.object); serd_stack_pop_to(&reader->stack, orig_stack_size); } - - return SERD_SUCCESS; + return st; } #if defined(__clang__) && __clang_major__ >= 10 diff --git a/src/reader.c b/src/reader.c index cfea1b87..661d48ae 100644 --- a/src/reader.c +++ b/src/reader.c @@ -62,6 +62,21 @@ genid_size(const SerdReader* const reader) return reader->bprefix_len + 1 + 10 + 1; // + "b" + UINT32_MAX + \0 } +bool +tolerate_status(const SerdReader* const reader, const SerdStatus status) +{ + if (status == SERD_SUCCESS || status == SERD_FAILURE) { + return true; + } + + if (status == SERD_ERR_INTERNAL || status == SERD_ERR_OVERFLOW || + status == SERD_ERR_BAD_WRITE || status == SERD_ERR_NO_DATA) { + return false; + } + + return !reader->strict; +} + SerdNode* blank_id(SerdReader* const reader) { @@ -300,7 +315,7 @@ serd_reader_prepare(SerdReader* const reader) } else if (st == SERD_FAILURE) { reader->source.eof = true; } else { - r_err(reader, st, "read error: %s\n", strerror(errno)); + r_err(reader, st, "error preparing read: %s\n", strerror(errno)); } return st; } diff --git a/src/reader.h b/src/reader.h index 252733cf..f45b69eb 100644 --- a/src/reader.h +++ b/src/reader.h @@ -82,6 +82,10 @@ SERD_PURE_FUNC size_t genid_size(const SerdReader* reader); +SERD_PURE_FUNC +bool +tolerate_status(const SerdReader* reader, SerdStatus status); + SerdNode* blank_id(SerdReader* reader); diff --git a/src/string.c b/src/string.c index 8393a3fb..097e73ef 100644 --- a/src/string.c +++ b/src/string.c @@ -56,6 +56,8 @@ serd_strerror(const SerdStatus status) return "Invalid text encoding"; case SERD_ERR_BAD_WRITE: return "Error writing to file"; + case SERD_ERR_NO_DATA: + return "Unexpected end of input"; } return "Unknown error"; diff --git a/test/test_string.c b/test/test_string.c index 2456e17a..472b464a 100644 --- a/test/test_string.c +++ b/test/test_string.c @@ -39,7 +39,7 @@ test_strerror(void) { const char* msg = serd_strerror(SERD_SUCCESS); assert(!strcmp(msg, "Success")); - for (int i = SERD_FAILURE; i <= SERD_ERR_BAD_WRITE; ++i) { + for (int i = SERD_FAILURE; i <= SERD_ERR_NO_DATA; ++i) { msg = serd_strerror((SerdStatus)i); assert(strcmp(msg, "Success")); } |