diff options
author | David Robillard <d@drobilla.net> | 2011-12-27 17:30:36 +0000 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2011-12-27 17:30:36 +0000 |
commit | e0f18e34021004a19709f0c627db51af1a27afcf (patch) | |
tree | 7d6f7750da0868b1ee47559d7ac10bdb8d73fea8 | |
parent | f4b25c02971b82cd841d3f6f7afd020b97825226 (diff) | |
download | serd-e0f18e34021004a19709f0c627db51af1a27afcf.tar.gz serd-e0f18e34021004a19709f0c627db51af1a27afcf.tar.bz2 serd-e0f18e34021004a19709f0c627db51af1a27afcf.zip |
Warn on invalid unicode character and replace (fix #796).
git-svn-id: http://svn.drobilla.net/serd/trunk@279 490d8e77-9747-427b-9fa3-0b8f29cee8a0
-rw-r--r-- | src/reader.c | 20 | ||||
-rw-r--r-- | tests/bad-eof-in-escape.ttl | 2 | ||||
-rw-r--r-- | tests/bad-hex-escape.ttl | 1 | ||||
-rw-r--r-- | tests/test-out-of-range-unicode.out | 1 | ||||
-rw-r--r-- | tests/test-out-of-range-unicode.ttl | 1 |
5 files changed, 16 insertions, 9 deletions
diff --git a/src/reader.c b/src/reader.c index 44f03997..836aa349 100644 --- a/src/reader.c +++ b/src/reader.c @@ -223,6 +223,14 @@ push_byte(SerdReader* reader, Ref ref, const uint8_t c) } static inline void +push_replacement(SerdReader* reader, Ref dest) +{ + push_byte(reader, dest, 0xEF); + push_byte(reader, dest, 0xBF); + push_byte(reader, dest, 0xBD); +} + +static inline void append_string(SerdReader* reader, Ref ref, const uint8_t* suffix, size_t len) { #ifdef SERD_STACK_CHECK @@ -312,10 +320,12 @@ read_hex_escape(SerdReader* reader, unsigned length, Ref dest) size = 2; } else if (c < 0x00010000) { size = 3; - } else if (c < 0x00200000) { + } else if (c < 0x00110000) { size = 4; } else { - return false; + error(reader, "unicode character 0x%X out of range\n", c); + push_replacement(reader, dest); + return true; } // Build output in buf @@ -414,11 +424,7 @@ static inline SerdStatus bad_char(SerdReader* reader, Ref dest, const char* fmt, uint8_t c) { warn(reader, fmt, c); - - // Emit replacement character - push_byte(reader, dest, 0xEF); - push_byte(reader, dest, 0xBF); - push_byte(reader, dest, 0xBD); + push_replacement(reader, dest); // Skip bytes until the next start byte for (uint8_t c = peek_byte(reader); (c & 0x80);) { diff --git a/tests/bad-eof-in-escape.ttl b/tests/bad-eof-in-escape.ttl index d60acd15..d3af9c93 100644 --- a/tests/bad-eof-in-escape.ttl +++ b/tests/bad-eof-in-escape.ttl @@ -1,3 +1,3 @@ @prefix eg: <http://example.org> . -<> eg:comment """Hello""
\ No newline at end of file +<> eg:comment """\uA
\ No newline at end of file diff --git a/tests/bad-hex-escape.ttl b/tests/bad-hex-escape.ttl deleted file mode 100644 index ba6ff5b9..00000000 --- a/tests/bad-hex-escape.ttl +++ /dev/null @@ -1 +0,0 @@ -<http://example.org/thing> <http://example.org/comment> "\UFFFFFFFF" . diff --git a/tests/test-out-of-range-unicode.out b/tests/test-out-of-range-unicode.out new file mode 100644 index 00000000..5def9e31 --- /dev/null +++ b/tests/test-out-of-range-unicode.out @@ -0,0 +1 @@ +<http://example.org/thing> <http://example.org/character> "\uFFFD" . diff --git a/tests/test-out-of-range-unicode.ttl b/tests/test-out-of-range-unicode.ttl new file mode 100644 index 00000000..7e64785a --- /dev/null +++ b/tests/test-out-of-range-unicode.ttl @@ -0,0 +1 @@ +<http://example.org/thing> <http://example.org/character> "\U00110000" . |