aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/reader.c20
-rw-r--r--tests/bad-eof-in-escape.ttl2
-rw-r--r--tests/bad-hex-escape.ttl1
-rw-r--r--tests/test-out-of-range-unicode.out1
-rw-r--r--tests/test-out-of-range-unicode.ttl1
5 files changed, 16 insertions, 9 deletions
diff --git a/src/reader.c b/src/reader.c
index 44f03997..836aa349 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -223,6 +223,14 @@ push_byte(SerdReader* reader, Ref ref, const uint8_t c)
}
static inline void
+push_replacement(SerdReader* reader, Ref dest)
+{
+ push_byte(reader, dest, 0xEF);
+ push_byte(reader, dest, 0xBF);
+ push_byte(reader, dest, 0xBD);
+}
+
+static inline void
append_string(SerdReader* reader, Ref ref, const uint8_t* suffix, size_t len)
{
#ifdef SERD_STACK_CHECK
@@ -312,10 +320,12 @@ read_hex_escape(SerdReader* reader, unsigned length, Ref dest)
size = 2;
} else if (c < 0x00010000) {
size = 3;
- } else if (c < 0x00200000) {
+ } else if (c < 0x00110000) {
size = 4;
} else {
- return false;
+ error(reader, "unicode character 0x%X out of range\n", c);
+ push_replacement(reader, dest);
+ return true;
}
// Build output in buf
@@ -414,11 +424,7 @@ static inline SerdStatus
bad_char(SerdReader* reader, Ref dest, const char* fmt, uint8_t c)
{
warn(reader, fmt, c);
-
- // Emit replacement character
- push_byte(reader, dest, 0xEF);
- push_byte(reader, dest, 0xBF);
- push_byte(reader, dest, 0xBD);
+ push_replacement(reader, dest);
// Skip bytes until the next start byte
for (uint8_t c = peek_byte(reader); (c & 0x80);) {
diff --git a/tests/bad-eof-in-escape.ttl b/tests/bad-eof-in-escape.ttl
index d60acd15..d3af9c93 100644
--- a/tests/bad-eof-in-escape.ttl
+++ b/tests/bad-eof-in-escape.ttl
@@ -1,3 +1,3 @@
@prefix eg: <http://example.org> .
-<> eg:comment """Hello"" \ No newline at end of file
+<> eg:comment """\uA \ No newline at end of file
diff --git a/tests/bad-hex-escape.ttl b/tests/bad-hex-escape.ttl
deleted file mode 100644
index ba6ff5b9..00000000
--- a/tests/bad-hex-escape.ttl
+++ /dev/null
@@ -1 +0,0 @@
-<http://example.org/thing> <http://example.org/comment> "\UFFFFFFFF" .
diff --git a/tests/test-out-of-range-unicode.out b/tests/test-out-of-range-unicode.out
new file mode 100644
index 00000000..5def9e31
--- /dev/null
+++ b/tests/test-out-of-range-unicode.out
@@ -0,0 +1 @@
+<http://example.org/thing> <http://example.org/character> "\uFFFD" .
diff --git a/tests/test-out-of-range-unicode.ttl b/tests/test-out-of-range-unicode.ttl
new file mode 100644
index 00000000..7e64785a
--- /dev/null
+++ b/tests/test-out-of-range-unicode.ttl
@@ -0,0 +1 @@
+<http://example.org/thing> <http://example.org/character> "\U00110000" .