From 09c4bb6a3031d2951ff3d285936a91a4f87dca0f Mon Sep 17 00:00:00 2001 From: David Robillard Date: Sat, 30 Mar 2013 16:37:21 +0000 Subject: Add more tests from the new W3C Turtle test suite. Support crazy escaped NULL characters in literals. Fix incorrect round-trip serialization test command. git-svn-id: http://svn.drobilla.net/serd/trunk@446 490d8e77-9747-427b-9fa3-0b8f29cee8a0 --- src/reader.c | 28 +++++++++++++++++-------- src/serdi.c | 6 +++--- tests/tests-ttl/LITERAL1_all_controls.nt | 1 + tests/tests-ttl/LITERAL1_all_controls.ttl | Bin 0 -> 77 bytes tests/tests-ttl/LITERAL1_all_controls.ttl.thru | 0 tests/tests-ttl/LITERAL1_all_punctuation.nt | 1 + tests/tests-ttl/LITERAL1_all_punctuation.ttl | 1 + tests/tests-ttl/manifest.ttl | 16 ++++++++++++++ wscript | 2 +- 9 files changed, 42 insertions(+), 13 deletions(-) create mode 100644 tests/tests-ttl/LITERAL1_all_controls.nt create mode 100644 tests/tests-ttl/LITERAL1_all_controls.ttl create mode 100644 tests/tests-ttl/LITERAL1_all_controls.ttl.thru create mode 100644 tests/tests-ttl/LITERAL1_all_punctuation.nt create mode 100644 tests/tests-ttl/LITERAL1_all_punctuation.ttl diff --git a/src/reader.c b/src/reader.c index 6233cf30..f86bb630 100644 --- a/src/reader.c +++ b/src/reader.c @@ -280,8 +280,8 @@ read_HEX(SerdReader* reader) } // Read UCHAR escape, initial \ is already eaten by caller -static inline uint32_t -read_UCHAR(SerdReader* reader, Ref dest) +static inline bool +read_UCHAR(SerdReader* reader, Ref dest, uint32_t* char_code) { const uint8_t b = peek_byte(reader); unsigned length = 0; @@ -293,14 +293,14 @@ read_UCHAR(SerdReader* reader, Ref dest) length = 4; break; default: - return 0; + return false; } eat_byte_safe(reader, b); uint8_t buf[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; for (unsigned i = 0; i < length; ++i) { if (!(buf[i] = read_HEX(reader))) { - return 0; + return false; } } @@ -320,7 +320,8 @@ read_UCHAR(SerdReader* reader, Ref dest) r_err(reader, SERD_ERR_BAD_SYNTAX, "unicode character 0x%X out of range\n", code); push_replacement(reader, dest); - return 0xFFFD; + *char_code = 0xFFFD; + return true; } // Build output in buf @@ -346,7 +347,8 @@ read_UCHAR(SerdReader* reader, Ref dest) for (unsigned i = 0; i < size; ++i) { push_byte(reader, dest, buf[i]); } - return code; + *char_code = code; + return true; } // Read ECHAR escape, initial \ is already eaten by caller @@ -521,10 +523,12 @@ read_STRING_LITERAL_LONG(SerdReader* reader, SerdNodeFlags* flags, uint8_t q) Ref ref = push_node(reader, SERD_LITERAL, "", 0); while (true) { const uint8_t c = peek_byte(reader); + uint32_t code; switch (c) { case '\\': eat_byte_safe(reader, c); - if (!read_ECHAR(reader, ref, flags) && !read_UCHAR(reader, ref)) { + if (!read_ECHAR(reader, ref, flags) && + !read_UCHAR(reader, ref, &code)) { r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid escape `\\%c'\n", peek_byte(reader)); return pop_node(reader, ref); @@ -559,13 +563,15 @@ read_STRING_LITERAL(SerdReader* reader, SerdNodeFlags* flags, uint8_t q) Ref ref = push_node(reader, SERD_LITERAL, "", 0); while (true) { const uint8_t c = peek_byte(reader); + uint32_t code; switch (c) { case '\n': case '\r': r_err(reader, SERD_ERR_BAD_SYNTAX, "line end in short string\n"); return pop_node(reader, ref); case '\\': eat_byte_safe(reader, c); - if (!read_ECHAR(reader, ref, flags) && !read_UCHAR(reader, ref)) { + if (!read_ECHAR(reader, ref, flags) && + !read_UCHAR(reader, ref, &code)) { r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid escape `\\%c'\n", peek_byte(reader)); return pop_node(reader, ref); @@ -775,7 +781,11 @@ read_IRIREF(SerdReader* reader) return ref; case '\\': eat_byte_safe(reader, c); - switch (code = read_UCHAR(reader, ref)) { + if (!read_UCHAR(reader, ref, &code)) { + r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid IRI escape\n"); + return pop_node(reader, ref); + } + switch (code) { case 0: case ' ': case '<': case '>': r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid escaped IRI character %X %c\n", code, code); diff --git a/src/serdi.c b/src/serdi.c index 290ee1d9..e1de6e32 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -195,9 +195,9 @@ main(int argc, char** argv) } } - if (input_syntax != SERD_NTRIPLES // Base URI may change (@base) - || (output_syntax == SERD_TURTLE)) { - output_style |= SERD_STYLE_RESOLVED; + if (input_syntax != SERD_NTRIPLES || (output_style & SERD_STYLE_CURIED)) { + // Base URI may change and/or we're abbreviating URIs, so must resolve + output_style |= SERD_STYLE_RESOLVED; // Base may chan } if (bulk_write) { diff --git a/tests/tests-ttl/LITERAL1_all_controls.nt b/tests/tests-ttl/LITERAL1_all_controls.nt new file mode 100644 index 00000000..91c8af14 --- /dev/null +++ b/tests/tests-ttl/LITERAL1_all_controls.nt @@ -0,0 +1 @@ + "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\t\u000B\u000C\u000E\u000F\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F" . diff --git a/tests/tests-ttl/LITERAL1_all_controls.ttl b/tests/tests-ttl/LITERAL1_all_controls.ttl new file mode 100644 index 00000000..dbf3721c Binary files /dev/null and b/tests/tests-ttl/LITERAL1_all_controls.ttl differ diff --git a/tests/tests-ttl/LITERAL1_all_controls.ttl.thru b/tests/tests-ttl/LITERAL1_all_controls.ttl.thru new file mode 100644 index 00000000..e69de29b diff --git a/tests/tests-ttl/LITERAL1_all_punctuation.nt b/tests/tests-ttl/LITERAL1_all_punctuation.nt new file mode 100644 index 00000000..c25d818f --- /dev/null +++ b/tests/tests-ttl/LITERAL1_all_punctuation.nt @@ -0,0 +1 @@ + " !\"#$%&():;<=>?@[]^_`{|}~" . diff --git a/tests/tests-ttl/LITERAL1_all_punctuation.ttl b/tests/tests-ttl/LITERAL1_all_punctuation.ttl new file mode 100644 index 00000000..7b1d9e54 --- /dev/null +++ b/tests/tests-ttl/LITERAL1_all_punctuation.ttl @@ -0,0 +1 @@ + ' !"#$%&():;<=>?@[]^_`{|}~' . diff --git a/tests/tests-ttl/manifest.ttl b/tests/tests-ttl/manifest.ttl index 7b9a5f4b..d38ed41d 100644 --- a/tests/tests-ttl/manifest.ttl +++ b/tests/tests-ttl/manifest.ttl @@ -66,6 +66,8 @@ <#first> <#last> <#LITERAL1> + <#LITERAL1_all_controls> + <#LITERAL1_all_punctuation> <#LITERAL_LONG1> <#LITERAL_LONG1_with_1_squote> <#LITERAL_LONG1_with_2_squotes> @@ -565,6 +567,20 @@ mf:result ; . +<#LITERAL1_all_controls> rdf:type rdft:TestTurtleEval ; + mf:name "LITERAL1_all_controls" ; + rdfs:comment "LITERAL1_all_controls '\\x00\\x01\\x02\\x03\\x04...'" ; + mf:action ; + mf:result ; + . + +<#LITERAL1_all_punctuation> rdf:type rdft:TestTurtleEval ; + mf:name "LITERAL1_all_punctuation" ; + rdfs:comment "LITERAL1_all_punctuation '!\"#$%&()...'" ; + mf:action ; + mf:result ; + . + <#LITERAL_LONG1> rdf:type rdft:TestTurtleEval ; mf:name "LITERAL_LONG1" ; rdfs:comment "LITERAL_LONG1 '''x'''" ; diff --git a/wscript b/wscript index 3b3ed4d9..7277e336 100644 --- a/wscript +++ b/wscript @@ -286,7 +286,7 @@ def test_thru(ctx, base, path, check_filename, flags): in_filename = os.path.join(ctx.path.abspath(), path); out_filename = path + '.thru' - command = ('%s %s -i ntriples -o turtle -p foo "%s" "%s" | ' + command = ('%s %s -i turtle -o turtle -p foo "%s" "%s" | ' '%s -i turtle -o ntriples -c foo - "%s" > %s') % ( 'serdi_static', flags.ljust(5), in_filename, base, -- cgit v1.2.1