From e0f18e34021004a19709f0c627db51af1a27afcf Mon Sep 17 00:00:00 2001 From: David Robillard Date: Tue, 27 Dec 2011 17:30:36 +0000 Subject: Warn on invalid unicode character and replace (fix #796). git-svn-id: http://svn.drobilla.net/serd/trunk@279 490d8e77-9747-427b-9fa3-0b8f29cee8a0 --- src/reader.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'src/reader.c') diff --git a/src/reader.c b/src/reader.c index 44f03997..836aa349 100644 --- a/src/reader.c +++ b/src/reader.c @@ -222,6 +222,14 @@ push_byte(SerdReader* reader, Ref ref, const uint8_t c) *s = '\0'; } +static inline void +push_replacement(SerdReader* reader, Ref dest) +{ + push_byte(reader, dest, 0xEF); + push_byte(reader, dest, 0xBF); + push_byte(reader, dest, 0xBD); +} + static inline void append_string(SerdReader* reader, Ref ref, const uint8_t* suffix, size_t len) { @@ -312,10 +320,12 @@ read_hex_escape(SerdReader* reader, unsigned length, Ref dest) size = 2; } else if (c < 0x00010000) { size = 3; - } else if (c < 0x00200000) { + } else if (c < 0x00110000) { size = 4; } else { - return false; + error(reader, "unicode character 0x%X out of range\n", c); + push_replacement(reader, dest); + return true; } // Build output in buf @@ -414,11 +424,7 @@ static inline SerdStatus bad_char(SerdReader* reader, Ref dest, const char* fmt, uint8_t c) { warn(reader, fmt, c); - - // Emit replacement character - push_byte(reader, dest, 0xEF); - push_byte(reader, dest, 0xBF); - push_byte(reader, dest, 0xBD); + push_replacement(reader, dest); // Skip bytes until the next start byte for (uint8_t c = peek_byte(reader); (c & 0x80);) { -- cgit v1.2.1