aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/reader.c28
-rw-r--r--src/serdi.c6
-rw-r--r--tests/tests-ttl/LITERAL1_all_controls.nt1
-rw-r--r--tests/tests-ttl/LITERAL1_all_controls.ttlbin0 -> 77 bytes
-rw-r--r--tests/tests-ttl/LITERAL1_all_controls.ttl.thru0
-rw-r--r--tests/tests-ttl/LITERAL1_all_punctuation.nt1
-rw-r--r--tests/tests-ttl/LITERAL1_all_punctuation.ttl1
-rw-r--r--tests/tests-ttl/manifest.ttl16
-rw-r--r--wscript2
9 files changed, 42 insertions, 13 deletions
diff --git a/src/reader.c b/src/reader.c
index 6233cf30..f86bb630 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -280,8 +280,8 @@ read_HEX(SerdReader* reader)
}
// Read UCHAR escape, initial \ is already eaten by caller
-static inline uint32_t
-read_UCHAR(SerdReader* reader, Ref dest)
+static inline bool
+read_UCHAR(SerdReader* reader, Ref dest, uint32_t* char_code)
{
const uint8_t b = peek_byte(reader);
unsigned length = 0;
@@ -293,14 +293,14 @@ read_UCHAR(SerdReader* reader, Ref dest)
length = 4;
break;
default:
- return 0;
+ return false;
}
eat_byte_safe(reader, b);
uint8_t buf[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
for (unsigned i = 0; i < length; ++i) {
if (!(buf[i] = read_HEX(reader))) {
- return 0;
+ return false;
}
}
@@ -320,7 +320,8 @@ read_UCHAR(SerdReader* reader, Ref dest)
r_err(reader, SERD_ERR_BAD_SYNTAX,
"unicode character 0x%X out of range\n", code);
push_replacement(reader, dest);
- return 0xFFFD;
+ *char_code = 0xFFFD;
+ return true;
}
// Build output in buf
@@ -346,7 +347,8 @@ read_UCHAR(SerdReader* reader, Ref dest)
for (unsigned i = 0; i < size; ++i) {
push_byte(reader, dest, buf[i]);
}
- return code;
+ *char_code = code;
+ return true;
}
// Read ECHAR escape, initial \ is already eaten by caller
@@ -521,10 +523,12 @@ read_STRING_LITERAL_LONG(SerdReader* reader, SerdNodeFlags* flags, uint8_t q)
Ref ref = push_node(reader, SERD_LITERAL, "", 0);
while (true) {
const uint8_t c = peek_byte(reader);
+ uint32_t code;
switch (c) {
case '\\':
eat_byte_safe(reader, c);
- if (!read_ECHAR(reader, ref, flags) && !read_UCHAR(reader, ref)) {
+ if (!read_ECHAR(reader, ref, flags) &&
+ !read_UCHAR(reader, ref, &code)) {
r_err(reader, SERD_ERR_BAD_SYNTAX,
"invalid escape `\\%c'\n", peek_byte(reader));
return pop_node(reader, ref);
@@ -559,13 +563,15 @@ read_STRING_LITERAL(SerdReader* reader, SerdNodeFlags* flags, uint8_t q)
Ref ref = push_node(reader, SERD_LITERAL, "", 0);
while (true) {
const uint8_t c = peek_byte(reader);
+ uint32_t code;
switch (c) {
case '\n': case '\r':
r_err(reader, SERD_ERR_BAD_SYNTAX, "line end in short string\n");
return pop_node(reader, ref);
case '\\':
eat_byte_safe(reader, c);
- if (!read_ECHAR(reader, ref, flags) && !read_UCHAR(reader, ref)) {
+ if (!read_ECHAR(reader, ref, flags) &&
+ !read_UCHAR(reader, ref, &code)) {
r_err(reader, SERD_ERR_BAD_SYNTAX,
"invalid escape `\\%c'\n", peek_byte(reader));
return pop_node(reader, ref);
@@ -775,7 +781,11 @@ read_IRIREF(SerdReader* reader)
return ref;
case '\\':
eat_byte_safe(reader, c);
- switch (code = read_UCHAR(reader, ref)) {
+ if (!read_UCHAR(reader, ref, &code)) {
+ r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid IRI escape\n");
+ return pop_node(reader, ref);
+ }
+ switch (code) {
case 0: case ' ': case '<': case '>':
r_err(reader, SERD_ERR_BAD_SYNTAX,
"invalid escaped IRI character %X %c\n", code, code);
diff --git a/src/serdi.c b/src/serdi.c
index 290ee1d9..e1de6e32 100644
--- a/src/serdi.c
+++ b/src/serdi.c
@@ -195,9 +195,9 @@ main(int argc, char** argv)
}
}
- if (input_syntax != SERD_NTRIPLES // Base URI may change (@base)
- || (output_syntax == SERD_TURTLE)) {
- output_style |= SERD_STYLE_RESOLVED;
+ if (input_syntax != SERD_NTRIPLES || (output_style & SERD_STYLE_CURIED)) {
+ // Base URI may change and/or we're abbreviating URIs, so must resolve
+ output_style |= SERD_STYLE_RESOLVED; // Base may chan
}
if (bulk_write) {
diff --git a/tests/tests-ttl/LITERAL1_all_controls.nt b/tests/tests-ttl/LITERAL1_all_controls.nt
new file mode 100644
index 00000000..91c8af14
--- /dev/null
+++ b/tests/tests-ttl/LITERAL1_all_controls.nt
@@ -0,0 +1 @@
+<http://a.example/s> <http://a.example/p> "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\t\u000B\u000C\u000E\u000F\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F" .
diff --git a/tests/tests-ttl/LITERAL1_all_controls.ttl b/tests/tests-ttl/LITERAL1_all_controls.ttl
new file mode 100644
index 00000000..dbf3721c
--- /dev/null
+++ b/tests/tests-ttl/LITERAL1_all_controls.ttl
Binary files differ
diff --git a/tests/tests-ttl/LITERAL1_all_controls.ttl.thru b/tests/tests-ttl/LITERAL1_all_controls.ttl.thru
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/tests/tests-ttl/LITERAL1_all_controls.ttl.thru
diff --git a/tests/tests-ttl/LITERAL1_all_punctuation.nt b/tests/tests-ttl/LITERAL1_all_punctuation.nt
new file mode 100644
index 00000000..c25d818f
--- /dev/null
+++ b/tests/tests-ttl/LITERAL1_all_punctuation.nt
@@ -0,0 +1 @@
+<http://a.example/s> <http://a.example/p> " !\"#$%&():;<=>?@[]^_`{|}~" .
diff --git a/tests/tests-ttl/LITERAL1_all_punctuation.ttl b/tests/tests-ttl/LITERAL1_all_punctuation.ttl
new file mode 100644
index 00000000..7b1d9e54
--- /dev/null
+++ b/tests/tests-ttl/LITERAL1_all_punctuation.ttl
@@ -0,0 +1 @@
+<http://a.example/s> <http://a.example/p> ' !"#$%&():;<=>?@[]^_`{|}~' .
diff --git a/tests/tests-ttl/manifest.ttl b/tests/tests-ttl/manifest.ttl
index 7b9a5f4b..d38ed41d 100644
--- a/tests/tests-ttl/manifest.ttl
+++ b/tests/tests-ttl/manifest.ttl
@@ -66,6 +66,8 @@
<#first>
<#last>
<#LITERAL1>
+ <#LITERAL1_all_controls>
+ <#LITERAL1_all_punctuation>
<#LITERAL_LONG1>
<#LITERAL_LONG1_with_1_squote>
<#LITERAL_LONG1_with_2_squotes>
@@ -565,6 +567,20 @@
mf:result <LITERAL1.nt> ;
.
+<#LITERAL1_all_controls> rdf:type rdft:TestTurtleEval ;
+ mf:name "LITERAL1_all_controls" ;
+ rdfs:comment "LITERAL1_all_controls '\\x00\\x01\\x02\\x03\\x04...'" ;
+ mf:action <LITERAL1_all_controls.ttl> ;
+ mf:result <LITERAL1_all_controls.nt> ;
+ .
+
+<#LITERAL1_all_punctuation> rdf:type rdft:TestTurtleEval ;
+ mf:name "LITERAL1_all_punctuation" ;
+ rdfs:comment "LITERAL1_all_punctuation '!\"#$%&()...'" ;
+ mf:action <LITERAL1_all_punctuation.ttl> ;
+ mf:result <LITERAL1_all_punctuation.nt> ;
+ .
+
<#LITERAL_LONG1> rdf:type rdft:TestTurtleEval ;
mf:name "LITERAL_LONG1" ;
rdfs:comment "LITERAL_LONG1 '''x'''" ;
diff --git a/wscript b/wscript
index 3b3ed4d9..7277e336 100644
--- a/wscript
+++ b/wscript
@@ -286,7 +286,7 @@ def test_thru(ctx, base, path, check_filename, flags):
in_filename = os.path.join(ctx.path.abspath(), path);
out_filename = path + '.thru'
- command = ('%s %s -i ntriples -o turtle -p foo "%s" "%s" | '
+ command = ('%s %s -i turtle -o turtle -p foo "%s" "%s" | '
'%s -i turtle -o ntriples -c foo - "%s" > %s') % (
'serdi_static', flags.ljust(5),
in_filename, base,