From 0be7ffef641d8ac4db8ab25857f960a9d4cfb719 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Mon, 1 Mar 2021 11:57:44 -0500 Subject: WIP: Write invalid characters in URIs with percent encoding --- src/writer.c | 14 +++++++++----- test/test_reader_writer.c | 2 ++ 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/writer.c b/src/writer.c index 8632b0b5..38a64459 100644 --- a/src/writer.c +++ b/src/writer.c @@ -220,8 +220,8 @@ write_character(SerdWriter* writer, const uint8_t* utf8, size_t* size) switch (*size) { case 0: serd_world_errorf( - writer->world, SERD_ERR_BAD_ARG, "invalid UTF-8: %X\n", utf8[0]); - return sink(replacement_char, sizeof(replacement_char), writer); + writer->world, SERD_ERR_BAD_ARG, "invalid UTF-8 start: %X\n", utf8[0]); + return 0; case 1: snprintf(escape, sizeof(escape), "\\u%04X", utf8[0]); return sink(escape, 6, writer); @@ -286,8 +286,11 @@ write_uri(SerdWriter* writer, const char* utf8, size_t n_bytes) len += write_character(writer, (const uint8_t*)utf8 + i, &size); i += size; if (size == 0) { - // Corrupt input, scan to start of next character - for (++i; i < n_bytes && (utf8[i] & 0x80); ++i) { + // Corrupt input, write percent-encoded byts and scan to next start + char escape[4] = {0, 0, 0, 0}; + for (; i < n_bytes && (utf8[i] & 0x80); ++i) { + snprintf(escape, sizeof(escape), "%%%02X", (uint8_t)utf8[i]); + len += sink(escape, 3, writer); } } } @@ -457,7 +460,8 @@ write_text(SerdWriter* writer, len += write_character(writer, (const uint8_t*)utf8 + i - 1, &size); if (size == 0) { - // Corrupt input, scan to start of next character + // Corrupt input, write repacement character and scan to the next start + len += sink(replacement_char, sizeof(replacement_char), writer); for (; i < n_bytes && (utf8[i] & 0x80); ++i) { } } else { diff --git a/test/test_reader_writer.c b/test/test_reader_writer.c index f526a68c..8cbce04e 100644 --- a/test/test_reader_writer.c +++ b/test/test_reader_writer.c @@ -40,7 +40,9 @@ static size_t eof_test_read(void* buf, size_t size, size_t nmemb, void* stream) { assert(size == 1); + assert(nmemb == 1); (void)size; + (void)nmemb; static const char* const string = "_:s1 _:o1 .\n" "_:s2 _:o2 .\n"; -- cgit v1.2.1