From 4f1cec5fd409fc7ba98ca0bb19af10a492a6af2a Mon Sep 17 00:00:00 2001 From: David Robillard Date: Fri, 23 Dec 2011 23:23:19 +0000 Subject: Escape ASCII control characters in output (e.g. fix problems with string literals that start with a backspace). git-svn-id: http://svn.drobilla.net/serd/trunk@263 490d8e77-9747-427b-9fa3-0b8f29cee8a0 --- ChangeLog | 2 ++ src/writer.c | 17 +++++++++-------- tests/test-backspace.out | 1 + tests/test-backspace.ttl | 1 + 4 files changed, 13 insertions(+), 8 deletions(-) create mode 100644 tests/test-backspace.out create mode 100644 tests/test-backspace.ttl diff --git a/ChangeLog b/ChangeLog index 72177335..a3fab995 100644 --- a/ChangeLog +++ b/ChangeLog @@ -16,6 +16,8 @@ serd (UNRELEASED) unstable; urgency=low * Add serd_strtod(), serd_node_new_decimal(), and serd_node_new_integer() for locale-independent numeric node parsing/serialising. * Add serd_file_sink for the common case of writing to a FILE* stream. + * Escape ASCII control characters in output (e.g. fix problems with string + literals that start with a backspace) -- David Robillard (UNRELEASED) diff --git a/src/writer.c b/src/writer.c index b9d80ce7..96f8d1e4 100644 --- a/src/writer.c +++ b/src/writer.c @@ -101,8 +101,7 @@ write_text(SerdWriter* writer, TextContext ctx, size_t j = i; for (; j < n_bytes; ++j) { if (utf8[j] == terminator || utf8[j] == '\\' || utf8[j] == '"' - || (((writer->style & SERD_STYLE_ASCII) || ctx == WRITE_URI) - && !in_range(utf8[j], 0x20, 0x7E))) { + || (!in_range(utf8[j], 0x20, 0x7E))) { break; } } @@ -146,10 +145,13 @@ write_text(SerdWriter* writer, TextContext ctx, if ((in & 0x80) == 0) { // Starts with `0' size = 1; c = in & 0x7F; - if (in_range(in, 0x20, 0x7E)) { // Printable ASCII - sink(&in, 1, writer); - continue; + if (in_range(c, 0x20, 0x7E) || (ctx != WRITE_URI && is_space(c))) { + sink(&in, 1, writer); // Print ASCII character + } else { + snprintf(escape, 7, "\\u%04X", c); + sink(escape, 6, writer); // Escape ASCII control character } + continue; } else if ((in & 0xE0) == 0xC0) { // Starts with `110' size = 2; c = in & 0x1F; @@ -164,10 +166,9 @@ write_text(SerdWriter* writer, TextContext ctx, return false; } - if ((ctx == WRITE_STRING || ctx == WRITE_LONG_STRING) - && !(writer->style & SERD_STYLE_ASCII)) { + if (ctx != WRITE_URI && !(writer->style & SERD_STYLE_ASCII)) { // Write UTF-8 character directly to UTF-8 output - // TODO: Scan to next escape and write entire range at once + // TODO: Always parse and validate character? sink(utf8 + i - 1, size, writer); i += size - 1; continue; diff --git a/tests/test-backspace.out b/tests/test-backspace.out new file mode 100644 index 00000000..ae8d3678 --- /dev/null +++ b/tests/test-backspace.out @@ -0,0 +1 @@ + "\u0008" . diff --git a/tests/test-backspace.ttl b/tests/test-backspace.ttl new file mode 100644 index 00000000..0e695a88 --- /dev/null +++ b/tests/test-backspace.ttl @@ -0,0 +1 @@ + "\u0008" . \ No newline at end of file -- cgit v1.2.1