From ea4b6e7d109ae3abc7f8ecdf99e3eb33e3484b77 Mon Sep 17 00:00:00 2001 From: David Robillard <d@drobilla.net> Date: Sun, 5 Feb 2023 12:42:52 -0500 Subject: Strengthen handling of corrupt UTF-8 input --- src/read_utf8.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) (limited to 'src/read_utf8.c') diff --git a/src/read_utf8.c b/src/read_utf8.c index fb8ed0e2..f86bbeba 100644 --- a/src/read_utf8.c +++ b/src/read_utf8.c @@ -10,22 +10,11 @@ #define MAX_UTF8_BYTES 4U -static SerdStatus -skip_invalid_utf8(SerdReader* const reader) -{ - for (int b = peek_byte(reader); b != EOF && ((uint8_t)b & 0x80);) { - skip_byte(reader, b); - b = peek_byte(reader); - } - - return reader->strict ? SERD_BAD_SYNTAX : SERD_FAILURE; -} - static SerdStatus bad_char(SerdReader* const reader, const char* const fmt, const uint8_t c) { r_err(reader, SERD_BAD_SYNTAX, fmt, c); - return skip_invalid_utf8(reader); + return reader->strict ? SERD_BAD_SYNTAX : SERD_FAILURE; } static SerdStatus @@ -48,7 +37,7 @@ read_utf8_continuation_bytes(SerdReader* const reader, } const uint8_t byte = (uint8_t)b; - if (!(byte & 0x80U)) { + if (!is_utf8_continuation(byte)) { return bad_char(reader, "0x%X is not a UTF-8 continuation byte", byte); } -- cgit v1.2.1