aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2021-03-01 11:57:44 -0500
committerDavid Robillard <d@drobilla.net>2021-03-08 23:23:05 -0500
commit0be7ffef641d8ac4db8ab25857f960a9d4cfb719 (patch)
tree7880d5d04c958179c91f3fcd156d67d6476fceba
parent2824a66adfec03a242f8df8b855af2061b39c13d (diff)
downloadserd-0be7ffef641d8ac4db8ab25857f960a9d4cfb719.tar.gz
serd-0be7ffef641d8ac4db8ab25857f960a9d4cfb719.tar.bz2
serd-0be7ffef641d8ac4db8ab25857f960a9d4cfb719.zip
WIP: Write invalid characters in URIs with percent encoding
-rw-r--r--src/writer.c14
-rw-r--r--test/test_reader_writer.c2
2 files changed, 11 insertions, 5 deletions
diff --git a/src/writer.c b/src/writer.c
index 8632b0b5..38a64459 100644
--- a/src/writer.c
+++ b/src/writer.c
@@ -220,8 +220,8 @@ write_character(SerdWriter* writer, const uint8_t* utf8, size_t* size)
switch (*size) {
case 0:
serd_world_errorf(
- writer->world, SERD_ERR_BAD_ARG, "invalid UTF-8: %X\n", utf8[0]);
- return sink(replacement_char, sizeof(replacement_char), writer);
+ writer->world, SERD_ERR_BAD_ARG, "invalid UTF-8 start: %X\n", utf8[0]);
+ return 0;
case 1:
snprintf(escape, sizeof(escape), "\\u%04X", utf8[0]);
return sink(escape, 6, writer);
@@ -286,8 +286,11 @@ write_uri(SerdWriter* writer, const char* utf8, size_t n_bytes)
len += write_character(writer, (const uint8_t*)utf8 + i, &size);
i += size;
if (size == 0) {
- // Corrupt input, scan to start of next character
- for (++i; i < n_bytes && (utf8[i] & 0x80); ++i) {
+ // Corrupt input, write percent-encoded byts and scan to next start
+ char escape[4] = {0, 0, 0, 0};
+ for (; i < n_bytes && (utf8[i] & 0x80); ++i) {
+ snprintf(escape, sizeof(escape), "%%%02X", (uint8_t)utf8[i]);
+ len += sink(escape, 3, writer);
}
}
}
@@ -457,7 +460,8 @@ write_text(SerdWriter* writer,
len += write_character(writer, (const uint8_t*)utf8 + i - 1, &size);
if (size == 0) {
- // Corrupt input, scan to start of next character
+ // Corrupt input, write repacement character and scan to the next start
+ len += sink(replacement_char, sizeof(replacement_char), writer);
for (; i < n_bytes && (utf8[i] & 0x80); ++i) {
}
} else {
diff --git a/test/test_reader_writer.c b/test/test_reader_writer.c
index f526a68c..8cbce04e 100644
--- a/test/test_reader_writer.c
+++ b/test/test_reader_writer.c
@@ -40,7 +40,9 @@ static size_t
eof_test_read(void* buf, size_t size, size_t nmemb, void* stream)
{
assert(size == 1);
+ assert(nmemb == 1);
(void)size;
+ (void)nmemb;
static const char* const string = "_:s1 <http://example.org/p> _:o1 .\n"
"_:s2 <http://example.org/p> _:o2 .\n";