diff options
author | David Robillard <d@drobilla.net> | 2025-01-30 15:28:14 -0500 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2025-01-30 15:51:01 -0500 |
commit | 8d87a6aa12745d01abb8ef8468b8e3f258af1996 (patch) | |
tree | d343fa877b5e041b21380830482b5d36503f75a9 | |
parent | c6cb174e7085cd2a41ed0ab74bbcbd7963e68d9d (diff) | |
download | serd-main.tar.gz serd-main.tar.bz2 serd-main.zip |
Puts all error messages that print input characters in the same place, so that
potentially printing them can be handled properly. Most notably this avoids
printing EOF and control characters in error messages.
-rw-r--r-- | NEWS | 3 | ||||
-rw-r--r-- | src/n3.c | 31 | ||||
-rw-r--r-- | src/reader.c | 12 | ||||
-rw-r--r-- | src/reader.h | 5 |
4 files changed, 28 insertions, 23 deletions
@@ -1,9 +1,10 @@ serd (0.32.5) unstable; urgency=medium * Fix handling of some invalid EOF cases in lax mode + * Fix invalid characters in error messages * Remove project and version number from man page OS field - -- David Robillard <d@drobilla.net> Thu, 30 Jan 2025 19:35:32 +0000 + -- David Robillard <d@drobilla.net> Thu, 30 Jan 2025 20:28:00 +0000 serd (0.32.4) stable; urgency=medium @@ -46,7 +46,7 @@ read_HEX(SerdReader* const reader) return (uint8_t)eat_byte_safe(reader, c); } - r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid hexadecimal digit '%c'\n", c); + r_err_char(reader, "hexadecimal", c); return 0; } @@ -465,11 +465,7 @@ read_PN_CHARS_BASE(SerdReader* const reader, const Ref dest) read_utf8_code(reader, dest, &code, (uint8_t)c); if (!is_PN_CHARS_BASE(code)) { - r_err( - reader, SERD_ERR_BAD_SYNTAX, "invalid character U+%04X in name\n", code); - if (reader->strict) { - return SERD_ERR_BAD_SYNTAX; - } + st = r_err_char(reader, "name", (int)code); } return st; @@ -501,8 +497,7 @@ read_PN_CHARS(SerdReader* const reader, const Ref dest) TRY(st, read_utf8_code(reader, dest, &code, (uint8_t)c)); if (!is_PN_CHARS(code)) { - return r_err( - reader, SERD_ERR_BAD_SYNTAX, "invalid character U+%04X in name\n", code); + st = r_err_char(reader, "name", (int)code); } return st; @@ -531,7 +526,7 @@ read_PN_LOCAL_ESC(SerdReader* const reader, const Ref dest) return ((c == '!') || in_range(c, '#', '/') || (c == ';') || (c == '=') || (c == '?') || (c == '@') || (c == '_') || (c == '~')) ? push_byte(reader, dest, eat_byte_safe(reader, c)) - : r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid escape\n"); + : r_err(reader, SERD_ERR_BAD_SYNTAX, "bad escape\n"); } static SerdStatus @@ -631,7 +626,7 @@ read_LANGTAG(SerdReader* const reader, Ref* const dest) { int c = peek_byte(reader); if (!is_alpha(c)) { - return r_err(reader, SERD_ERR_BAD_SYNTAX, "unexpected '%c'\n", c); + return r_err_char(reader, "language", c); } *dest = push_node(reader, SERD_LITERAL, "", 0); @@ -657,7 +652,7 @@ read_IRIREF_scheme(SerdReader* const reader, const Ref dest) { int c = peek_byte(reader); if (!is_alpha(c)) { - return r_err(reader, SERD_ERR_BAD_SYNTAX, "bad IRI scheme start '%c'\n", c); + return r_err_char(reader, "IRI scheme start", c); } while ((c = peek_byte(reader)) > 0) { @@ -666,11 +661,7 @@ read_IRIREF_scheme(SerdReader* const reader, const Ref dest) } if (!is_uri_scheme_char(c)) { - return r_err(reader, - SERD_ERR_BAD_SYNTAX, - "bad IRI scheme char U+%04X (%c)\n", - (unsigned)c, - (char)c); + return r_err_char(reader, "IRI scheme", c); } push_byte(reader, dest, eat_byte_safe(reader, c)); @@ -704,8 +695,7 @@ read_IRIREF(SerdReader* const reader, Ref* const dest) case '"': case '<': *dest = pop_node(reader, *dest); - return r_err( - reader, SERD_ERR_BAD_SYNTAX, "invalid IRI character '%c'\n", c); + return r_err_char(reader, "IRI", c); case '>': return SERD_SUCCESS; @@ -713,7 +703,7 @@ read_IRIREF(SerdReader* const reader, Ref* const dest) case '\\': if (read_UCHAR(reader, *dest, &code)) { *dest = pop_node(reader, *dest); - return r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid IRI escape\n"); + return r_err_char(reader, "IRI escape", c); } if (code == ' ' || code == '<' || code == '>') { @@ -731,8 +721,7 @@ read_IRIREF(SerdReader* const reader, Ref* const dest) case '|': case '}': *dest = pop_node(reader, *dest); - return r_err( - reader, SERD_ERR_BAD_SYNTAX, "invalid IRI character '%c'\n", c); + return r_err_char(reader, "IRI", c); default: if (c <= 0) { diff --git a/src/reader.c b/src/reader.c index b2563c49..778913c8 100644 --- a/src/reader.c +++ b/src/reader.c @@ -30,6 +30,18 @@ r_err(SerdReader* const reader, const SerdStatus st, const char* const fmt, ...) return st; } +SerdStatus +r_err_char(SerdReader* const reader, const char* const kind, const int c) +{ + const SerdStatus st = SERD_ERR_BAD_SYNTAX; + + return (c < 0x20 || c == 0x7F || c > 0x10FFFF) + ? r_err(reader, st, "bad %s character\n", kind) + : (c == '\'' || c >= 0x80) + ? r_err(reader, st, "bad %s character U+%04X\n", kind, (uint32_t)c) + : r_err(reader, st, "bad %s character '%c'\n", kind, c); +} + void set_blank_id(SerdReader* const reader, const Ref ref, const size_t buf_size) { diff --git a/src/reader.h b/src/reader.h index d7b06a98..65c49f2c 100644 --- a/src/reader.h +++ b/src/reader.h @@ -69,6 +69,9 @@ SERD_LOG_FUNC(3, 4) SerdStatus r_err(SerdReader* reader, SerdStatus st, const char* fmt, ...); +SerdStatus +r_err_char(SerdReader* reader, const char* kind, int c); + Ref push_node_padded(SerdReader* reader, size_t maxlen, @@ -143,7 +146,7 @@ eat_byte_check(SerdReader* const reader, const int byte) { const int c = peek_byte(reader); if (c != byte) { - r_err(reader, SERD_ERR_BAD_SYNTAX, "expected '%c', not '%c'\n", byte, c); + r_err(reader, SERD_ERR_BAD_SYNTAX, "expected '%c'\n", byte); return 0; } return eat_byte_safe(reader, byte); |