aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2021-07-08 22:50:02 -0400
committerDavid Robillard <d@drobilla.net>2022-01-13 23:04:25 -0500
commit51f1de0235cfb091fcc481fa97d48326295d2c58 (patch)
treeeb4114352fce89faeb239de9644d4f92f6e1db86
parent86178f4cac2259cd2c26ef27488c3ea36a947c7d (diff)
downloadserd-51f1de0235cfb091fcc481fa97d48326295d2c58.tar.gz
serd-51f1de0235cfb091fcc481fa97d48326295d2c58.tar.bz2
serd-51f1de0235cfb091fcc481fa97d48326295d2c58.zip
Handle read errors more precisely
-rw-r--r--include/serd/serd.h1
-rw-r--r--src/n3.c48
-rw-r--r--src/reader.c17
-rw-r--r--src/reader.h4
-rw-r--r--src/string.c2
-rw-r--r--test/test_string.c2
6 files changed, 47 insertions, 27 deletions
diff --git a/include/serd/serd.h b/include/serd/serd.h
index 77b42a24..8d00f4e5 100644
--- a/include/serd/serd.h
+++ b/include/serd/serd.h
@@ -205,6 +205,7 @@ typedef enum {
SERD_ERR_OVERFLOW, ///< Stack overflow
SERD_ERR_BAD_TEXT, ///< Invalid text encoding
SERD_ERR_BAD_WRITE, ///< Error writing to file/stream
+ SERD_ERR_NO_DATA, ///< Unexpected end of input
} SerdStatus;
/**
diff --git a/src/n3.c b/src/n3.c
index be4afb7d..ac46dc61 100644
--- a/src/n3.c
+++ b/src/n3.c
@@ -224,6 +224,11 @@ read_utf8_character(SerdReader* const reader,
uint32_t size = 0;
uint8_t bytes[4] = {0, 0, 0, 0};
SerdStatus st = read_utf8_bytes(reader, bytes, &size, c);
+
+ if (!tolerate_status(reader, st)) {
+ return st;
+ }
+
if (st) {
const SerdStatus rst = push_bytes(reader, dest, replacement_char, 3);
return rst ? rst : st;
@@ -343,8 +348,7 @@ read_STRING_LITERAL_LONG(SerdReader* const reader,
const uint8_t q)
{
SerdStatus st = SERD_SUCCESS;
-
- while (!(st && reader->strict)) {
+ while (tolerate_status(reader, st)) {
const int c = peek_byte(reader);
if (c == '\\') {
eat_byte_safe(reader, c);
@@ -353,6 +357,8 @@ read_STRING_LITERAL_LONG(SerdReader* const reader,
(st = read_UCHAR(reader, ref, &code))) {
return r_err(reader, st, "invalid escape `\\%c'\n", peek_byte(reader));
}
+ } else if (c == EOF) {
+ st = r_err(reader, SERD_ERR_NO_DATA, "unexpected end of file\n");
} else if (c == q) {
eat_byte_safe(reader, q);
const int q2 = eat_byte_safe(reader, peek_byte(reader));
@@ -365,14 +371,12 @@ read_STRING_LITERAL_LONG(SerdReader* const reader,
if (!(st = push_byte(reader, ref, c))) {
st = read_character(reader, ref, (uint8_t)q2);
}
- } else if (c == EOF) {
- return r_err(reader, SERD_ERR_BAD_SYNTAX, "end of file in long string\n");
} else {
st = read_character(reader, ref, (uint8_t)eat_byte_safe(reader, c));
}
}
- return (st && reader->strict) ? st : SERD_SUCCESS;
+ return tolerate_status(reader, st) ? SERD_SUCCESS : st;
}
// STRING_LITERAL_QUOTE and STRING_LITERAL_SINGLE_QUOTE
@@ -384,7 +388,7 @@ read_STRING_LITERAL(SerdReader* const reader,
{
SerdStatus st = SERD_SUCCESS;
- while (!(st && reader->strict)) {
+ while (tolerate_status(reader, st)) {
const int c = peek_byte(reader);
uint32_t code = 0;
switch (c) {
@@ -403,14 +407,15 @@ read_STRING_LITERAL(SerdReader* const reader,
break;
default:
if (c == q) {
- return eat_byte_check(reader, q);
+ eat_byte_safe(reader, q);
+ return SERD_SUCCESS;
} else {
st = read_character(reader, ref, (uint8_t)eat_byte_safe(reader, c));
}
}
}
- return st ? st : eat_byte_check(reader, q);
+ return tolerate_status(reader, st) ? SERD_SUCCESS : st;
}
static SerdStatus
@@ -794,15 +799,13 @@ read_IRIREF(SerdReader* const reader, SerdNode** const dest)
}
} else if (!(c & 0x80)) {
st = push_byte(reader, *dest, c);
- } else if (read_utf8_character(reader, *dest, (uint8_t)c)) {
- if (reader->strict) {
- return SERD_ERR_BAD_SYNTAX;
- }
+ } else {
+ st = read_utf8_character(reader, *dest, (uint8_t)c);
}
}
}
- return st;
+ return tolerate_status(reader, st) ? SERD_SUCCESS : st;
}
static SerdStatus
@@ -1057,7 +1060,7 @@ read_BLANK_NODE_LABEL(SerdReader* const reader,
}
}
- return SERD_SUCCESS;
+ return tolerate_status(reader, st) ? SERD_SUCCESS : st;
}
static SerdStatus
@@ -1093,10 +1096,7 @@ read_anon(SerdReader* const reader,
ctx.subject = *dest;
if (!empty) {
bool ate_dot_in_list = false;
- st = read_predicateObjectList(reader, ctx, &ate_dot_in_list);
- if (st > SERD_FAILURE) {
- return st;
- }
+ TRY(st, read_predicateObjectList(reader, ctx, &ate_dot_in_list));
if (ate_dot_in_list) {
return r_err(reader, SERD_ERR_BAD_SYNTAX, "`.' inside blank\n");
@@ -1238,7 +1238,7 @@ read_objectList(SerdReader* const reader, ReadContext ctx, bool* const ate_dot)
reader, SERD_ERR_BAD_SYNTAX, "syntax does not support abbreviation\n");
}
- while (!*ate_dot && eat_delim(reader, ',')) {
+ while (st <= SERD_FAILURE && !*ate_dot && eat_delim(reader, ',')) {
st = read_object(reader, &ctx, true, ate_dot);
}
@@ -1681,7 +1681,7 @@ read_turtleTrigDoc(SerdReader* const reader)
const size_t orig_stack_size = reader->stack.size;
const SerdStatus st = read_n3_statement(reader);
if (st > SERD_FAILURE) {
- if (reader->strict) {
+ if (!tolerate_status(reader, st)) {
serd_stack_pop_to(&reader->stack, orig_stack_size);
return st;
}
@@ -1698,7 +1698,7 @@ SerdStatus
read_nquadsDoc(SerdReader* const reader)
{
SerdStatus st = SERD_SUCCESS;
- while (!reader->source.eof) {
+ while (!st && !reader->source.eof) {
const size_t orig_stack_size = reader->stack.size;
SerdStatementFlags flags = 0;
@@ -1741,12 +1741,10 @@ read_nquadsDoc(SerdReader* const reader)
TRY(st, eat_byte_check(reader, '.'));
}
- TRY(st, emit_statement(reader, ctx, ctx.object));
-
+ st = emit_statement(reader, ctx, ctx.object);
serd_stack_pop_to(&reader->stack, orig_stack_size);
}
-
- return SERD_SUCCESS;
+ return st;
}
#if defined(__clang__) && __clang_major__ >= 10
diff --git a/src/reader.c b/src/reader.c
index cfea1b87..661d48ae 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -62,6 +62,21 @@ genid_size(const SerdReader* const reader)
return reader->bprefix_len + 1 + 10 + 1; // + "b" + UINT32_MAX + \0
}
+bool
+tolerate_status(const SerdReader* const reader, const SerdStatus status)
+{
+ if (status == SERD_SUCCESS || status == SERD_FAILURE) {
+ return true;
+ }
+
+ if (status == SERD_ERR_INTERNAL || status == SERD_ERR_OVERFLOW ||
+ status == SERD_ERR_BAD_WRITE || status == SERD_ERR_NO_DATA) {
+ return false;
+ }
+
+ return !reader->strict;
+}
+
SerdNode*
blank_id(SerdReader* const reader)
{
@@ -300,7 +315,7 @@ serd_reader_prepare(SerdReader* const reader)
} else if (st == SERD_FAILURE) {
reader->source.eof = true;
} else {
- r_err(reader, st, "read error: %s\n", strerror(errno));
+ r_err(reader, st, "error preparing read: %s\n", strerror(errno));
}
return st;
}
diff --git a/src/reader.h b/src/reader.h
index 252733cf..f45b69eb 100644
--- a/src/reader.h
+++ b/src/reader.h
@@ -82,6 +82,10 @@ SERD_PURE_FUNC
size_t
genid_size(const SerdReader* reader);
+SERD_PURE_FUNC
+bool
+tolerate_status(const SerdReader* reader, SerdStatus status);
+
SerdNode*
blank_id(SerdReader* reader);
diff --git a/src/string.c b/src/string.c
index 8393a3fb..097e73ef 100644
--- a/src/string.c
+++ b/src/string.c
@@ -56,6 +56,8 @@ serd_strerror(const SerdStatus status)
return "Invalid text encoding";
case SERD_ERR_BAD_WRITE:
return "Error writing to file";
+ case SERD_ERR_NO_DATA:
+ return "Unexpected end of input";
}
return "Unknown error";
diff --git a/test/test_string.c b/test/test_string.c
index 2456e17a..472b464a 100644
--- a/test/test_string.c
+++ b/test/test_string.c
@@ -39,7 +39,7 @@ test_strerror(void)
{
const char* msg = serd_strerror(SERD_SUCCESS);
assert(!strcmp(msg, "Success"));
- for (int i = SERD_FAILURE; i <= SERD_ERR_BAD_WRITE; ++i) {
+ for (int i = SERD_FAILURE; i <= SERD_ERR_NO_DATA; ++i) {
msg = serd_strerror((SerdStatus)i);
assert(strcmp(msg, "Success"));
}