From 9c29938c172e2423f67925274a18b4f1c1bb42cf Mon Sep 17 00:00:00 2001 From: David Robillard Date: Sun, 9 Jul 2017 20:09:36 +0200 Subject: Factor out UTF-8 character size counting --- src/reader.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'src/reader.c') diff --git a/src/reader.c b/src/reader.c index dd7fa8b2..91e0e920 100644 --- a/src/reader.c +++ b/src/reader.c @@ -409,14 +409,8 @@ bad_char(SerdReader* reader, Ref dest, const char* fmt, uint8_t c) static SerdStatus read_utf8_character(SerdReader* reader, Ref dest, uint8_t c) { - unsigned size = 1; - if ((c & 0xE0) == 0xC0) { // Starts with `110' - size = 2; - } else if ((c & 0xF0) == 0xE0) { // Starts with `1110' - size = 3; - } else if ((c & 0xF8) == 0xF0) { // Starts with `11110' - size = 4; - } else { + const uint32_t size = utf8_num_bytes(c); + if (size <= 1 || size > 4) { return bad_char(reader, dest, "invalid UTF-8 start 0x%X\n", c); } -- cgit v1.2.1