aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2025-01-30 15:28:14 -0500
committerDavid Robillard <d@drobilla.net>2025-01-30 15:51:01 -0500
commit8d87a6aa12745d01abb8ef8468b8e3f258af1996 (patch)
treed343fa877b5e041b21380830482b5d36503f75a9
parentc6cb174e7085cd2a41ed0ab74bbcbd7963e68d9d (diff)
downloadserd-main.tar.gz
serd-main.tar.bz2
serd-main.zip
Unify invalid input character error reportingHEADmain
Puts all error messages that print input characters in the same place, so that potentially printing them can be handled properly. Most notably this avoids printing EOF and control characters in error messages.
-rw-r--r--NEWS3
-rw-r--r--src/n3.c31
-rw-r--r--src/reader.c12
-rw-r--r--src/reader.h5
4 files changed, 28 insertions, 23 deletions
diff --git a/NEWS b/NEWS
index fa649066..d5dce4fc 100644
--- a/NEWS
+++ b/NEWS
@@ -1,9 +1,10 @@
serd (0.32.5) unstable; urgency=medium
* Fix handling of some invalid EOF cases in lax mode
+ * Fix invalid characters in error messages
* Remove project and version number from man page OS field
- -- David Robillard <d@drobilla.net> Thu, 30 Jan 2025 19:35:32 +0000
+ -- David Robillard <d@drobilla.net> Thu, 30 Jan 2025 20:28:00 +0000
serd (0.32.4) stable; urgency=medium
diff --git a/src/n3.c b/src/n3.c
index 3d4de079..9750532b 100644
--- a/src/n3.c
+++ b/src/n3.c
@@ -46,7 +46,7 @@ read_HEX(SerdReader* const reader)
return (uint8_t)eat_byte_safe(reader, c);
}
- r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid hexadecimal digit '%c'\n", c);
+ r_err_char(reader, "hexadecimal", c);
return 0;
}
@@ -465,11 +465,7 @@ read_PN_CHARS_BASE(SerdReader* const reader, const Ref dest)
read_utf8_code(reader, dest, &code, (uint8_t)c);
if (!is_PN_CHARS_BASE(code)) {
- r_err(
- reader, SERD_ERR_BAD_SYNTAX, "invalid character U+%04X in name\n", code);
- if (reader->strict) {
- return SERD_ERR_BAD_SYNTAX;
- }
+ st = r_err_char(reader, "name", (int)code);
}
return st;
@@ -501,8 +497,7 @@ read_PN_CHARS(SerdReader* const reader, const Ref dest)
TRY(st, read_utf8_code(reader, dest, &code, (uint8_t)c));
if (!is_PN_CHARS(code)) {
- return r_err(
- reader, SERD_ERR_BAD_SYNTAX, "invalid character U+%04X in name\n", code);
+ st = r_err_char(reader, "name", (int)code);
}
return st;
@@ -531,7 +526,7 @@ read_PN_LOCAL_ESC(SerdReader* const reader, const Ref dest)
return ((c == '!') || in_range(c, '#', '/') || (c == ';') || (c == '=') ||
(c == '?') || (c == '@') || (c == '_') || (c == '~'))
? push_byte(reader, dest, eat_byte_safe(reader, c))
- : r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid escape\n");
+ : r_err(reader, SERD_ERR_BAD_SYNTAX, "bad escape\n");
}
static SerdStatus
@@ -631,7 +626,7 @@ read_LANGTAG(SerdReader* const reader, Ref* const dest)
{
int c = peek_byte(reader);
if (!is_alpha(c)) {
- return r_err(reader, SERD_ERR_BAD_SYNTAX, "unexpected '%c'\n", c);
+ return r_err_char(reader, "language", c);
}
*dest = push_node(reader, SERD_LITERAL, "", 0);
@@ -657,7 +652,7 @@ read_IRIREF_scheme(SerdReader* const reader, const Ref dest)
{
int c = peek_byte(reader);
if (!is_alpha(c)) {
- return r_err(reader, SERD_ERR_BAD_SYNTAX, "bad IRI scheme start '%c'\n", c);
+ return r_err_char(reader, "IRI scheme start", c);
}
while ((c = peek_byte(reader)) > 0) {
@@ -666,11 +661,7 @@ read_IRIREF_scheme(SerdReader* const reader, const Ref dest)
}
if (!is_uri_scheme_char(c)) {
- return r_err(reader,
- SERD_ERR_BAD_SYNTAX,
- "bad IRI scheme char U+%04X (%c)\n",
- (unsigned)c,
- (char)c);
+ return r_err_char(reader, "IRI scheme", c);
}
push_byte(reader, dest, eat_byte_safe(reader, c));
@@ -704,8 +695,7 @@ read_IRIREF(SerdReader* const reader, Ref* const dest)
case '"':
case '<':
*dest = pop_node(reader, *dest);
- return r_err(
- reader, SERD_ERR_BAD_SYNTAX, "invalid IRI character '%c'\n", c);
+ return r_err_char(reader, "IRI", c);
case '>':
return SERD_SUCCESS;
@@ -713,7 +703,7 @@ read_IRIREF(SerdReader* const reader, Ref* const dest)
case '\\':
if (read_UCHAR(reader, *dest, &code)) {
*dest = pop_node(reader, *dest);
- return r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid IRI escape\n");
+ return r_err_char(reader, "IRI escape", c);
}
if (code == ' ' || code == '<' || code == '>') {
@@ -731,8 +721,7 @@ read_IRIREF(SerdReader* const reader, Ref* const dest)
case '|':
case '}':
*dest = pop_node(reader, *dest);
- return r_err(
- reader, SERD_ERR_BAD_SYNTAX, "invalid IRI character '%c'\n", c);
+ return r_err_char(reader, "IRI", c);
default:
if (c <= 0) {
diff --git a/src/reader.c b/src/reader.c
index b2563c49..778913c8 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -30,6 +30,18 @@ r_err(SerdReader* const reader, const SerdStatus st, const char* const fmt, ...)
return st;
}
+SerdStatus
+r_err_char(SerdReader* const reader, const char* const kind, const int c)
+{
+ const SerdStatus st = SERD_ERR_BAD_SYNTAX;
+
+ return (c < 0x20 || c == 0x7F || c > 0x10FFFF)
+ ? r_err(reader, st, "bad %s character\n", kind)
+ : (c == '\'' || c >= 0x80)
+ ? r_err(reader, st, "bad %s character U+%04X\n", kind, (uint32_t)c)
+ : r_err(reader, st, "bad %s character '%c'\n", kind, c);
+}
+
void
set_blank_id(SerdReader* const reader, const Ref ref, const size_t buf_size)
{
diff --git a/src/reader.h b/src/reader.h
index d7b06a98..65c49f2c 100644
--- a/src/reader.h
+++ b/src/reader.h
@@ -69,6 +69,9 @@ SERD_LOG_FUNC(3, 4)
SerdStatus
r_err(SerdReader* reader, SerdStatus st, const char* fmt, ...);
+SerdStatus
+r_err_char(SerdReader* reader, const char* kind, int c);
+
Ref
push_node_padded(SerdReader* reader,
size_t maxlen,
@@ -143,7 +146,7 @@ eat_byte_check(SerdReader* const reader, const int byte)
{
const int c = peek_byte(reader);
if (c != byte) {
- r_err(reader, SERD_ERR_BAD_SYNTAX, "expected '%c', not '%c'\n", byte, c);
+ r_err(reader, SERD_ERR_BAD_SYNTAX, "expected '%c'\n", byte);
return 0;
}
return eat_byte_safe(reader, byte);