aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2017-07-09 20:43:26 +0200
committerDavid Robillard <d@drobilla.net>2017-07-10 12:06:56 +0200
commitd292cd3ac24f954069bc6ecb3d9356b20d8f6100 (patch)
tree619ff3638f4cf6de82a7279c78086fcb7dfd4c6a /src
parent58f153dcdf1c5424ed3cefb3ce59e63b30f68f27 (diff)
downloadserd-d292cd3ac24f954069bc6ecb3d9356b20d8f6100.tar.gz
serd-d292cd3ac24f954069bc6ecb3d9356b20d8f6100.tar.bz2
serd-d292cd3ac24f954069bc6ecb3d9356b20d8f6100.zip
Gracefully handle applications that write corrupt UTF-8
Diffstat (limited to 'src')
-rw-r--r--src/writer.c20
1 files changed, 13 insertions, 7 deletions
diff --git a/src/writer.c b/src/writer.c
index c293b4f8..10637aaf 100644
--- a/src/writer.c
+++ b/src/writer.c
@@ -62,6 +62,8 @@ typedef struct {
uint8_t space_after_sep; ///< Newline after sep if after sep
} SepRule;
+static const uint8_t replacement_char[] = { 0xEF, 0xBF, 0xBD };
+
static const SepRule rules[] = {
{ NULL, 0, 0, 0, 0 },
{ " .\n\n", 4, 0, 0, 0 },
@@ -166,7 +168,7 @@ parse_utf8_char(SerdWriter* writer, const uint8_t* utf8, size_t* size)
case 1: case 2: case 3: case 4:
break;
default:
- return 0;
+ return *size = 0;
}
uint32_t c = utf8[0] & ((1 << (8 - *size)) - 1);
@@ -182,10 +184,8 @@ parse_utf8_char(SerdWriter* writer, const uint8_t* utf8, size_t* size)
static size_t
write_character(SerdWriter* writer, const uint8_t* utf8, size_t* size)
{
- const uint8_t replacement_char[] = { 0xEF, 0xBF, 0xBD };
- char escape[11] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
-
- const uint32_t c = parse_utf8_char(writer, utf8, size);
+ char escape[11] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ const uint32_t c = parse_utf8_char(writer, utf8, size);
switch (*size) {
case 0:
w_err(writer, SERD_ERR_BAD_ARG, "invalid UTF-8: %X\n", utf8[0]);
@@ -245,6 +245,11 @@ write_uri(SerdWriter* writer, const uint8_t* utf8, size_t n_bytes)
size_t size = 0;
len += write_character(writer, utf8 + i, &size);
i += size;
+ if (size == 0) {
+ // Corrupt input, write replacement char and scan to next start
+ sink(replacement_char, sizeof(replacement_char), writer);
+ for (; i < n_bytes && (utf8[i] & 0x80); ++i) {}
+ }
}
return len;
}
@@ -351,9 +356,10 @@ write_text(SerdWriter* writer, TextContext ctx,
size_t size = 0;
len += write_character(writer, utf8 + i - 1, &size);
-
if (size == 0) {
- return len;
+ // Corrupt input, write replacement char and scan to next start
+ sink(replacement_char, sizeof(replacement_char), writer);
+ for (; i < n_bytes && (utf8[i] & 0x80); ++i) {}
}
i += size - 1;