diff options
-rw-r--r-- | src/reader.c | 33 | ||||
-rw-r--r-- | tests/good/test-escapes.nt | 2 | ||||
-rw-r--r-- | tests/good/test-escapes.ttl | 2 | ||||
-rw-r--r-- | tests/good/test-uri-escape.nt | 2 | ||||
-rw-r--r-- | tests/good/test-uri-escape.ttl | 2 | ||||
-rw-r--r-- | wscript | 8 |
6 files changed, 30 insertions, 19 deletions
diff --git a/src/reader.c b/src/reader.c index 73eba93a..b74153ac 100644 --- a/src/reader.c +++ b/src/reader.c @@ -280,7 +280,7 @@ read_HEX(SerdReader* reader) } // Read UCHAR escape, initial \ is already eaten by caller -static inline bool +static inline uint32_t read_UCHAR(SerdReader* reader, Ref dest) { const uint8_t b = peek_byte(reader); @@ -293,38 +293,39 @@ read_UCHAR(SerdReader* reader, Ref dest) length = 4; break; default: - return false; + return 0; } eat_byte_safe(reader, b); uint8_t buf[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; for (unsigned i = 0; i < length; ++i) { if (!(buf[i] = read_HEX(reader))) { - return false; + return 0; } } - uint32_t c; - sscanf((const char*)buf, "%X", &c); + uint32_t code; + sscanf((const char*)buf, "%X", &code); unsigned size = 0; - if (c < 0x00000080) { + if (code < 0x00000080) { size = 1; - } else if (c < 0x00000800) { + } else if (code < 0x00000800) { size = 2; - } else if (c < 0x00010000) { + } else if (code < 0x00010000) { size = 3; - } else if (c < 0x00110000) { + } else if (code < 0x00110000) { size = 4; } else { r_err(reader, SERD_ERR_BAD_SYNTAX, - "unicode character 0x%X out of range\n", c); + "unicode character 0x%X out of range\n", code); push_replacement(reader, dest); - return true; + return 0xFFFD; } // Build output in buf // (Note # of bytes = # of leading 1 bits in first byte) + uint32_t c = code; switch (size) { case 4: buf[3] = 0x80 | (uint8_t)(c & 0x3F); @@ -345,7 +346,7 @@ read_UCHAR(SerdReader* reader, Ref dest) for (unsigned i = 0; i < size; ++i) { push_byte(reader, dest, buf[i]); } - return true; + return code; } // Read ECHAR escape, initial \ is already eaten by caller @@ -741,7 +742,8 @@ static Ref read_IRIREF(SerdReader* reader) { TRY_RET(eat_byte_check(reader, '<')); - Ref ref = push_node(reader, SERD_URI, "", 0); + Ref ref = push_node(reader, SERD_URI, "", 0); + uint32_t code; while (true) { const uint8_t c = peek_byte(reader); switch (c) { @@ -754,9 +756,10 @@ read_IRIREF(SerdReader* reader) return ref; case '\\': eat_byte_safe(reader, c); - if (!read_UCHAR(reader, ref)) { + switch (code = read_UCHAR(reader, ref)) { + case 0: case ' ': case '<': case '>': r_err(reader, SERD_ERR_BAD_SYNTAX, - "invalid IRI character `%c'\n", c); + "invalid escaped IRI character %X %c\n", code, code); return pop_node(reader, ref); } break; diff --git a/tests/good/test-escapes.nt b/tests/good/test-escapes.nt index 725b832c..e30222e7 100644 --- a/tests/good/test-escapes.nt +++ b/tests/good/test-escapes.nt @@ -1,2 +1,2 @@ <http://example.org/thing> <http://example.org/comment> "\\\r\n\t" . -<http://example.org/thing> <http://example.org/comment> <http://example.org/\u003E> . +<http://example.org/thing> <http://example.org/comment> <http://example.org/\u005C> . diff --git a/tests/good/test-escapes.ttl b/tests/good/test-escapes.ttl index 9898e883..e30222e7 100644 --- a/tests/good/test-escapes.ttl +++ b/tests/good/test-escapes.ttl @@ -1,2 +1,2 @@ <http://example.org/thing> <http://example.org/comment> "\\\r\n\t" . -<http://example.org/thing> <http://example.org/comment> <http://example.org/\u003E> .
\ No newline at end of file +<http://example.org/thing> <http://example.org/comment> <http://example.org/\u005C> . diff --git a/tests/good/test-uri-escape.nt b/tests/good/test-uri-escape.nt index bdb27185..320e7c33 100644 --- a/tests/good/test-uri-escape.nt +++ b/tests/good/test-uri-escape.nt @@ -1 +1 @@ -<http://example.org/node> <http://example.org/prop> <scheme:\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\u000B\u000C\u000E\u000F\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F\u0020!\u0022#$%&'()*+,-./0123456789:/\u003C=\u003E?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\u005C]\u005E_\u0060abcdefghijklmnopqrstuvwxyz\u007B\u007C\u007D~\u007F> . +<http://example.org/node> <http://example.org/prop> <scheme:\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\u000B\u000C\u000E\u000F\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F%20!\u0022#$%&'()*+,-./0123456789:/%3C=%3E?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\u005C]\u005E_\u0060abcdefghijklmnopqrstuvwxyz\u007B\u007C\u007D~\u007F> . diff --git a/tests/good/test-uri-escape.ttl b/tests/good/test-uri-escape.ttl index bdb27185..320e7c33 100644 --- a/tests/good/test-uri-escape.ttl +++ b/tests/good/test-uri-escape.ttl @@ -1 +1 @@ -<http://example.org/node> <http://example.org/prop> <scheme:\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\u000B\u000C\u000E\u000F\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F\u0020!\u0022#$%&'()*+,-./0123456789:/\u003C=\u003E?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\u005C]\u005E_\u0060abcdefghijklmnopqrstuvwxyz\u007B\u007C\u007D~\u007F> . +<http://example.org/node> <http://example.org/prop> <scheme:\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\u000B\u000C\u000E\u000F\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F%20!\u0022#$%&'()*+,-./0123456789:/%3C=%3E?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\u005C]\u005E_\u0060abcdefghijklmnopqrstuvwxyz\u007B\u007C\u007D~\u007F> . @@ -348,6 +348,14 @@ def test_manifest(ctx, srcdir, testdir, report, test_base, parse_base): passed = run_test(action_node, 1) report.write(earl_assertion(test, passed, asserter)) + for i in sorted(model.triples([None, rdf.type, rdft.TestTurtleNegativeEval])): + test = i[0] + name = model.value(test, mf.name, None) + action_node = model.value(test, mf.action, None)[len(test_base):] + + passed = run_test(action_node, 1) + report.write(earl_assertion(test, passed, asserter)) + for i in sorted(model.triples([None, rdf.type, rdft.TestTurtleEval])): test = i[0] name = model.value(test, mf.name, None) |