aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2011-12-23 23:23:19 +0000
committerDavid Robillard <d@drobilla.net>2011-12-23 23:23:19 +0000
commit4f1cec5fd409fc7ba98ca0bb19af10a492a6af2a (patch)
tree40de7868ba42b62f41a0f4e261496f4d3695c4e9
parent4b512a68903953e1372c3ad3b982eed72a7db681 (diff)
downloadserd-4f1cec5fd409fc7ba98ca0bb19af10a492a6af2a.tar.gz
serd-4f1cec5fd409fc7ba98ca0bb19af10a492a6af2a.tar.bz2
serd-4f1cec5fd409fc7ba98ca0bb19af10a492a6af2a.zip
Escape ASCII control characters in output (e.g. fix problems with string
literals that start with a backspace). git-svn-id: http://svn.drobilla.net/serd/trunk@263 490d8e77-9747-427b-9fa3-0b8f29cee8a0
-rw-r--r--ChangeLog2
-rw-r--r--src/writer.c17
-rw-r--r--tests/test-backspace.out1
-rw-r--r--tests/test-backspace.ttl1
4 files changed, 13 insertions, 8 deletions
diff --git a/ChangeLog b/ChangeLog
index 72177335..a3fab995 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -16,6 +16,8 @@ serd (UNRELEASED) unstable; urgency=low
* Add serd_strtod(), serd_node_new_decimal(), and serd_node_new_integer()
for locale-independent numeric node parsing/serialising.
* Add serd_file_sink for the common case of writing to a FILE* stream.
+ * Escape ASCII control characters in output (e.g. fix problems with string
+ literals that start with a backspace)
-- David Robillard <d@drobilla.net> (UNRELEASED)
diff --git a/src/writer.c b/src/writer.c
index b9d80ce7..96f8d1e4 100644
--- a/src/writer.c
+++ b/src/writer.c
@@ -101,8 +101,7 @@ write_text(SerdWriter* writer, TextContext ctx,
size_t j = i;
for (; j < n_bytes; ++j) {
if (utf8[j] == terminator || utf8[j] == '\\' || utf8[j] == '"'
- || (((writer->style & SERD_STYLE_ASCII) || ctx == WRITE_URI)
- && !in_range(utf8[j], 0x20, 0x7E))) {
+ || (!in_range(utf8[j], 0x20, 0x7E))) {
break;
}
}
@@ -146,10 +145,13 @@ write_text(SerdWriter* writer, TextContext ctx,
if ((in & 0x80) == 0) { // Starts with `0'
size = 1;
c = in & 0x7F;
- if (in_range(in, 0x20, 0x7E)) { // Printable ASCII
- sink(&in, 1, writer);
- continue;
+ if (in_range(c, 0x20, 0x7E) || (ctx != WRITE_URI && is_space(c))) {
+ sink(&in, 1, writer); // Print ASCII character
+ } else {
+ snprintf(escape, 7, "\\u%04X", c);
+ sink(escape, 6, writer); // Escape ASCII control character
}
+ continue;
} else if ((in & 0xE0) == 0xC0) { // Starts with `110'
size = 2;
c = in & 0x1F;
@@ -164,10 +166,9 @@ write_text(SerdWriter* writer, TextContext ctx,
return false;
}
- if ((ctx == WRITE_STRING || ctx == WRITE_LONG_STRING)
- && !(writer->style & SERD_STYLE_ASCII)) {
+ if (ctx != WRITE_URI && !(writer->style & SERD_STYLE_ASCII)) {
// Write UTF-8 character directly to UTF-8 output
- // TODO: Scan to next escape and write entire range at once
+ // TODO: Always parse and validate character?
sink(utf8 + i - 1, size, writer);
i += size - 1;
continue;
diff --git a/tests/test-backspace.out b/tests/test-backspace.out
new file mode 100644
index 00000000..ae8d3678
--- /dev/null
+++ b/tests/test-backspace.out
@@ -0,0 +1 @@
+<http://example.org/thing> <http://example.org/label> "\u0008" .
diff --git a/tests/test-backspace.ttl b/tests/test-backspace.ttl
new file mode 100644
index 00000000..0e695a88
--- /dev/null
+++ b/tests/test-backspace.ttl
@@ -0,0 +1 @@
+<http://example.org/thing> <http://example.org/label> "\u0008" . \ No newline at end of file