aboutsummaryrefslogtreecommitdiffstats
path: root/src/read_utf8.c
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2023-02-05 12:42:52 -0500
committerDavid Robillard <d@drobilla.net>2023-12-02 18:49:08 -0500
commitea4b6e7d109ae3abc7f8ecdf99e3eb33e3484b77 (patch)
tree9b050faf6286c055d2fc78729eb4b56a12e3746c /src/read_utf8.c
parentd35082a57adac79703f2c9bb72da468172a209c5 (diff)
downloadserd-ea4b6e7d109ae3abc7f8ecdf99e3eb33e3484b77.tar.gz
serd-ea4b6e7d109ae3abc7f8ecdf99e3eb33e3484b77.tar.bz2
serd-ea4b6e7d109ae3abc7f8ecdf99e3eb33e3484b77.zip
Strengthen handling of corrupt UTF-8 input
Diffstat (limited to 'src/read_utf8.c')
-rw-r--r--src/read_utf8.c15
1 files changed, 2 insertions, 13 deletions
diff --git a/src/read_utf8.c b/src/read_utf8.c
index fb8ed0e2..f86bbeba 100644
--- a/src/read_utf8.c
+++ b/src/read_utf8.c
@@ -11,21 +11,10 @@
#define MAX_UTF8_BYTES 4U
static SerdStatus
-skip_invalid_utf8(SerdReader* const reader)
-{
- for (int b = peek_byte(reader); b != EOF && ((uint8_t)b & 0x80);) {
- skip_byte(reader, b);
- b = peek_byte(reader);
- }
-
- return reader->strict ? SERD_BAD_SYNTAX : SERD_FAILURE;
-}
-
-static SerdStatus
bad_char(SerdReader* const reader, const char* const fmt, const uint8_t c)
{
r_err(reader, SERD_BAD_SYNTAX, fmt, c);
- return skip_invalid_utf8(reader);
+ return reader->strict ? SERD_BAD_SYNTAX : SERD_FAILURE;
}
static SerdStatus
@@ -48,7 +37,7 @@ read_utf8_continuation_bytes(SerdReader* const reader,
}
const uint8_t byte = (uint8_t)b;
- if (!(byte & 0x80U)) {
+ if (!is_utf8_continuation(byte)) {
return bad_char(reader, "0x%X is not a UTF-8 continuation byte", byte);
}