diff options
author | David Robillard <d@drobilla.net> | 2018-11-25 16:12:23 +0100 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2019-10-27 22:40:23 +0100 |
commit | bbe5313c31386366175222cde3055b63848ebb4c (patch) | |
tree | b5d92f8d7e0b889aadd3528c7e94a99e0a7faa89 | |
parent | 433f17a017c3471afe6b09cb74929d0ce76822c4 (diff) | |
download | serd-bbe5313c31386366175222cde3055b63848ebb4c.tar.gz serd-bbe5313c31386366175222cde3055b63848ebb4c.tar.bz2 serd-bbe5313c31386366175222cde3055b63848ebb4c.zip |
Fix lax handling of string errors and add separate lax test suite
-rw-r--r-- | NEWS | 6 | ||||
-rw-r--r-- | src/n3.c | 23 | ||||
-rw-r--r-- | tests/good/manifest.ttl | 7 | ||||
-rw-r--r-- | tests/good/test-bad-utf8.ttl | 3 | ||||
-rw-r--r-- | tests/lax/manifest.ttl | 25 | ||||
-rw-r--r-- | tests/lax/test-bad-uri.nt | 3 | ||||
-rw-r--r-- | tests/lax/test-bad-uri.ttl | 4 | ||||
-rw-r--r-- | tests/lax/test-bad-utf8.nt (renamed from tests/good/test-bad-utf8.nt) | 3 | ||||
-rw-r--r-- | tests/lax/test-bad-utf8.ttl | 6 | ||||
-rw-r--r-- | wscript | 14 |
10 files changed, 67 insertions, 27 deletions
@@ -1,3 +1,9 @@ +serd (0.30.3) unstable; + + * Fix lax handling of string errors + + -- David Robillard <d@drobilla.net> Sun, 27 Oct 2019 21:38:43 +0000 + serd (0.30.2) stable; * Fix GCC 4 build @@ -302,8 +302,9 @@ eat_delim(SerdReader* reader, const char delim) static Ref read_STRING_LITERAL_LONG(SerdReader* reader, SerdNodeFlags* flags, uint8_t q) { - Ref ref = push_node(reader, SERD_LITERAL, "", 0); - while (!reader->status) { + Ref ref = push_node(reader, SERD_LITERAL, "", 0); + SerdStatus st = SERD_SUCCESS; + while (!reader->status && !(st && reader->strict)) { const uint8_t c = peek_byte(reader); if (c == '\\') { eat_byte_safe(reader, c); @@ -326,7 +327,7 @@ read_STRING_LITERAL_LONG(SerdReader* reader, SerdNodeFlags* flags, uint8_t q) push_byte(reader, ref, c); read_character(reader, ref, flags, q2); } else { - read_character(reader, ref, flags, eat_byte_safe(reader, c)); + st = read_character(reader, ref, flags, eat_byte_safe(reader, c)); } } return ref; @@ -337,8 +338,9 @@ read_STRING_LITERAL_LONG(SerdReader* reader, SerdNodeFlags* flags, uint8_t q) static Ref read_STRING_LITERAL(SerdReader* reader, SerdNodeFlags* flags, uint8_t q) { - Ref ref = push_node(reader, SERD_LITERAL, "", 0); - while (!reader->status) { + Ref ref = push_node(reader, SERD_LITERAL, "", 0); + SerdStatus st = SERD_SUCCESS; + while (!reader->status && !(st && reader->strict)) { const uint8_t c = peek_byte(reader); uint32_t code = 0; switch (c) { @@ -359,7 +361,7 @@ read_STRING_LITERAL(SerdReader* reader, SerdNodeFlags* flags, uint8_t q) eat_byte_check(reader, q); return ref; } else { - read_character(reader, ref, flags, eat_byte_safe(reader, c)); + st = read_character(reader, ref, flags, eat_byte_safe(reader, c)); } } } @@ -615,13 +617,14 @@ static Ref read_IRIREF(SerdReader* reader) { TRY_RET(eat_byte_check(reader, '<')); - Ref ref = push_node(reader, SERD_URI, "", 0); + Ref ref = push_node(reader, SERD_URI, "", 0); + SerdStatus st = SERD_SUCCESS; if (!fancy_syntax(reader) && !read_IRIREF_scheme(reader, ref)) { return pop_node(reader, ref); } uint32_t code = 0; - while (!reader->status) { + while (!reader->status && !(st && reader->strict)) { const uint8_t c = eat_byte_safe(reader, peek_byte(reader)); switch (c) { case '"': case '<': case '^': case '`': case '{': case '|': case '}': @@ -660,11 +663,11 @@ read_IRIREF(SerdReader* reader) push_byte(reader, ref, c); } else if (!(c & 0x80)) { push_byte(reader, ref, c); - } else if (read_utf8_character(reader, ref, c)) { + } else if ((st = read_utf8_character(reader, ref, c))) { if (reader->strict) { + reader->status = SERD_FAILURE; return pop_node(reader, ref); } - reader->status = SERD_FAILURE; } } } diff --git a/tests/good/manifest.ttl b/tests/good/manifest.ttl index dd83a7e5..9dccde7b 100644 --- a/tests/good/manifest.ttl +++ b/tests/good/manifest.ttl @@ -18,7 +18,6 @@ <#test-30> <#test-a-without-whitespace> <#test-backspace> - <#test-bad-utf8> <#test-base-query> <#test-blank-cont> <#test-blank-in-list> @@ -109,12 +108,6 @@ mf:action <test-backspace.ttl> ; mf:result <test-backspace.nt> . -<#test-bad-utf8> - rdf:type rdft:TestTurtleEval ; - mf:name "test-bad-utf8" ; - mf:action <test-bad-utf8.ttl> ; - mf:result <test-bad-utf8.nt> . - <#test-base-query> rdf:type rdft:TestTurtleEval ; mf:name "test-base-query" ; diff --git a/tests/good/test-bad-utf8.ttl b/tests/good/test-bad-utf8.ttl deleted file mode 100644 index 2c105f5b..00000000 --- a/tests/good/test-bad-utf8.ttl +++ /dev/null @@ -1,3 +0,0 @@ -<http://example.org/thing> <http://example.org/comment> "Impossible bytes: þ ÿ" . -<http://example.org/thing> <http://example.org/comment> "2 continuation bytes: €¿" . -<http://example.org/thing> <http://example.org/comment> "Missing continuation: À" . diff --git a/tests/lax/manifest.ttl b/tests/lax/manifest.ttl new file mode 100644 index 00000000..e3b11ff4 --- /dev/null +++ b/tests/lax/manifest.ttl @@ -0,0 +1,25 @@ +@prefix mf: <http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#> . +@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . +@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . +@prefix rdft: <http://www.w3.org/ns/rdftest#> . + +<> + rdf:type mf:Manifest ; + rdfs:comment "Serd lax syntax test cases" ; + mf:entries ( + <#test-bad-uri> + <#test-bad-utf8> + ) . + +<#test-bad-uri> + rdf:type rdft:TestTurtleNegativeSyntax ; + mf:name "test-bad-uri" ; + mf:action <test-bad-uri.ttl> ; + mf:result <test-bad-uri.nt> . + +<#test-bad-utf8> + rdf:type rdft:TestTurtleNegativeSyntax ; + mf:name "test-bad-utf8" ; + mf:action <test-bad-utf8.ttl> ; + mf:result <test-bad-utf8.nt> . + diff --git a/tests/lax/test-bad-uri.nt b/tests/lax/test-bad-uri.nt new file mode 100644 index 00000000..7458e12a --- /dev/null +++ b/tests/lax/test-bad-uri.nt @@ -0,0 +1,3 @@ +<http://example.org/s> <http://example.org/p> <http://example.org/goodo1> . +<http://example.org/s> <http://example.org/p> <http://example.org/\uFFFDbado2> . +<http://example.org/s> <http://example.org/p> <http://example.org/goodo2> . diff --git a/tests/lax/test-bad-uri.ttl b/tests/lax/test-bad-uri.ttl new file mode 100644 index 00000000..8f11b1d7 --- /dev/null +++ b/tests/lax/test-bad-uri.ttl @@ -0,0 +1,4 @@ +<http://example.org/s> <http://example.org/p> <http://example.org/ bado1> . +<http://example.org/s> <http://example.org/p> <http://example.org/goodo1> . +<http://example.org/s> <http://example.org/p> <http://example.org/ÿÿbado2> . +<http://example.org/s> <http://example.org/p> <http://example.org/goodo2> . diff --git a/tests/good/test-bad-utf8.nt b/tests/lax/test-bad-utf8.nt index 6d73993d..09200e9d 100644 --- a/tests/good/test-bad-utf8.nt +++ b/tests/lax/test-bad-utf8.nt @@ -1,3 +1,6 @@ <http://example.org/thing> <http://example.org/comment> "Impossible bytes: \uFFFD \uFFFD" . <http://example.org/thing> <http://example.org/comment> "2 continuation bytes: \uFFFD" . <http://example.org/thing> <http://example.org/comment> "Missing continuation: \uFFFD" . +<http://example.org/thing> <http://example.org/comment> "Impossible bytes: \uFFFD \uFFFD" . +<http://example.org/thing> <http://example.org/comment> "2 continuation bytes: \uFFFD" . +<http://example.org/thing> <http://example.org/comment> "Missing continuation: \uFFFD" . diff --git a/tests/lax/test-bad-utf8.ttl b/tests/lax/test-bad-utf8.ttl new file mode 100644 index 00000000..526a171f --- /dev/null +++ b/tests/lax/test-bad-utf8.ttl @@ -0,0 +1,6 @@ +<http://example.org/thing> <http://example.org/comment> "Impossible bytes: þ ÿ" . +<http://example.org/thing> <http://example.org/comment> "2 continuation bytes: €¿" . +<http://example.org/thing> <http://example.org/comment> "Missing continuation: À" . +<http://example.org/thing> <http://example.org/comment> """Impossible bytes: þ ÿ""" . +<http://example.org/thing> <http://example.org/comment> """2 continuation bytes: €¿""" . +<http://example.org/thing> <http://example.org/comment> """Missing continuation: À""" . @@ -357,7 +357,8 @@ def test_suite(ctx, base_uri, testdir, report, isyntax, options=[]): expected=expected_return, name=action) - if result and ((mf + 'result') in model[test]): + if (result and expected_return == 0 and + ((mf + 'result') in model[test])): # Check output against test suite check_uri = model[test][mf + 'result'][0] check_path = ctx.src_path(file_uri_to_path(check_uri)) @@ -373,21 +374,18 @@ def test_suite(ctx, base_uri, testdir, report, isyntax, options=[]): if report is not None: report.write(earl_assertion(test, result, asserter)) - # Run lax test - check([command[0]] + ['-l'] + command[1:], - expected=None, name=action + ' lax') - ns_rdftest = 'http://www.w3.org/ns/rdftest#' for test_class, instances in instances.items(): if test_class.startswith(ns_rdftest): - expected = 1 if 'Negative' in test_class else 0 + expected = 1 if '-l' not in options and 'Negative' in test_class else 0 run_tests(test_class, instances, expected) def test(tst): import tempfile # Create test output directories - for i in ['bad', 'good', 'TurtleTests', 'NTriplesTests', 'NQuadsTests', 'TriGTests']: + for i in ['bad', 'good', 'lax', + 'TurtleTests', 'NTriplesTests', 'NQuadsTests', 'TriGTests']: try: test_dir = os.path.join('tests', i) os.makedirs(test_dir) @@ -472,6 +470,8 @@ def test(tst): serd_base = 'http://drobilla.net/sw/serd/tests/' test_suite(tst, serd_base + 'good/', 'good', None, 'Turtle') test_suite(tst, serd_base + 'bad/', 'bad', None, 'Turtle') + test_suite(tst, serd_base + 'lax/', 'lax', None, 'Turtle', ['-l']) + test_suite(tst, serd_base + 'lax/', 'lax', None, 'Turtle') # Standard test suites with open('earl.ttl', 'w') as report: |