aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--NEWS3
-rw-r--r--src/n3.c31
-rw-r--r--src/reader.c12
-rw-r--r--src/reader.h5
4 files changed, 28 insertions, 23 deletions
diff --git a/NEWS b/NEWS
index fa649066..d5dce4fc 100644
--- a/NEWS
+++ b/NEWS
@@ -1,9 +1,10 @@
serd (0.32.5) unstable; urgency=medium
* Fix handling of some invalid EOF cases in lax mode
+ * Fix invalid characters in error messages
* Remove project and version number from man page OS field
- -- David Robillard <d@drobilla.net> Thu, 30 Jan 2025 19:35:32 +0000
+ -- David Robillard <d@drobilla.net> Thu, 30 Jan 2025 20:28:00 +0000
serd (0.32.4) stable; urgency=medium
diff --git a/src/n3.c b/src/n3.c
index 3d4de079..9750532b 100644
--- a/src/n3.c
+++ b/src/n3.c
@@ -46,7 +46,7 @@ read_HEX(SerdReader* const reader)
return (uint8_t)eat_byte_safe(reader, c);
}
- r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid hexadecimal digit '%c'\n", c);
+ r_err_char(reader, "hexadecimal", c);
return 0;
}
@@ -465,11 +465,7 @@ read_PN_CHARS_BASE(SerdReader* const reader, const Ref dest)
read_utf8_code(reader, dest, &code, (uint8_t)c);
if (!is_PN_CHARS_BASE(code)) {
- r_err(
- reader, SERD_ERR_BAD_SYNTAX, "invalid character U+%04X in name\n", code);
- if (reader->strict) {
- return SERD_ERR_BAD_SYNTAX;
- }
+ st = r_err_char(reader, "name", (int)code);
}
return st;
@@ -501,8 +497,7 @@ read_PN_CHARS(SerdReader* const reader, const Ref dest)
TRY(st, read_utf8_code(reader, dest, &code, (uint8_t)c));
if (!is_PN_CHARS(code)) {
- return r_err(
- reader, SERD_ERR_BAD_SYNTAX, "invalid character U+%04X in name\n", code);
+ st = r_err_char(reader, "name", (int)code);
}
return st;
@@ -531,7 +526,7 @@ read_PN_LOCAL_ESC(SerdReader* const reader, const Ref dest)
return ((c == '!') || in_range(c, '#', '/') || (c == ';') || (c == '=') ||
(c == '?') || (c == '@') || (c == '_') || (c == '~'))
? push_byte(reader, dest, eat_byte_safe(reader, c))
- : r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid escape\n");
+ : r_err(reader, SERD_ERR_BAD_SYNTAX, "bad escape\n");
}
static SerdStatus
@@ -631,7 +626,7 @@ read_LANGTAG(SerdReader* const reader, Ref* const dest)
{
int c = peek_byte(reader);
if (!is_alpha(c)) {
- return r_err(reader, SERD_ERR_BAD_SYNTAX, "unexpected '%c'\n", c);
+ return r_err_char(reader, "language", c);
}
*dest = push_node(reader, SERD_LITERAL, "", 0);
@@ -657,7 +652,7 @@ read_IRIREF_scheme(SerdReader* const reader, const Ref dest)
{
int c = peek_byte(reader);
if (!is_alpha(c)) {
- return r_err(reader, SERD_ERR_BAD_SYNTAX, "bad IRI scheme start '%c'\n", c);
+ return r_err_char(reader, "IRI scheme start", c);
}
while ((c = peek_byte(reader)) > 0) {
@@ -666,11 +661,7 @@ read_IRIREF_scheme(SerdReader* const reader, const Ref dest)
}
if (!is_uri_scheme_char(c)) {
- return r_err(reader,
- SERD_ERR_BAD_SYNTAX,
- "bad IRI scheme char U+%04X (%c)\n",
- (unsigned)c,
- (char)c);
+ return r_err_char(reader, "IRI scheme", c);
}
push_byte(reader, dest, eat_byte_safe(reader, c));
@@ -704,8 +695,7 @@ read_IRIREF(SerdReader* const reader, Ref* const dest)
case '"':
case '<':
*dest = pop_node(reader, *dest);
- return r_err(
- reader, SERD_ERR_BAD_SYNTAX, "invalid IRI character '%c'\n", c);
+ return r_err_char(reader, "IRI", c);
case '>':
return SERD_SUCCESS;
@@ -713,7 +703,7 @@ read_IRIREF(SerdReader* const reader, Ref* const dest)
case '\\':
if (read_UCHAR(reader, *dest, &code)) {
*dest = pop_node(reader, *dest);
- return r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid IRI escape\n");
+ return r_err_char(reader, "IRI escape", c);
}
if (code == ' ' || code == '<' || code == '>') {
@@ -731,8 +721,7 @@ read_IRIREF(SerdReader* const reader, Ref* const dest)
case '|':
case '}':
*dest = pop_node(reader, *dest);
- return r_err(
- reader, SERD_ERR_BAD_SYNTAX, "invalid IRI character '%c'\n", c);
+ return r_err_char(reader, "IRI", c);
default:
if (c <= 0) {
diff --git a/src/reader.c b/src/reader.c
index b2563c49..778913c8 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -30,6 +30,18 @@ r_err(SerdReader* const reader, const SerdStatus st, const char* const fmt, ...)
return st;
}
+SerdStatus
+r_err_char(SerdReader* const reader, const char* const kind, const int c)
+{
+ const SerdStatus st = SERD_ERR_BAD_SYNTAX;
+
+ return (c < 0x20 || c == 0x7F || c > 0x10FFFF)
+ ? r_err(reader, st, "bad %s character\n", kind)
+ : (c == '\'' || c >= 0x80)
+ ? r_err(reader, st, "bad %s character U+%04X\n", kind, (uint32_t)c)
+ : r_err(reader, st, "bad %s character '%c'\n", kind, c);
+}
+
void
set_blank_id(SerdReader* const reader, const Ref ref, const size_t buf_size)
{
diff --git a/src/reader.h b/src/reader.h
index d7b06a98..65c49f2c 100644
--- a/src/reader.h
+++ b/src/reader.h
@@ -69,6 +69,9 @@ SERD_LOG_FUNC(3, 4)
SerdStatus
r_err(SerdReader* reader, SerdStatus st, const char* fmt, ...);
+SerdStatus
+r_err_char(SerdReader* reader, const char* kind, int c);
+
Ref
push_node_padded(SerdReader* reader,
size_t maxlen,
@@ -143,7 +146,7 @@ eat_byte_check(SerdReader* const reader, const int byte)
{
const int c = peek_byte(reader);
if (c != byte) {
- r_err(reader, SERD_ERR_BAD_SYNTAX, "expected '%c', not '%c'\n", byte, c);
+ r_err(reader, SERD_ERR_BAD_SYNTAX, "expected '%c'\n", byte);
return 0;
}
return eat_byte_safe(reader, byte);