From bbe5313c31386366175222cde3055b63848ebb4c Mon Sep 17 00:00:00 2001 From: David Robillard Date: Sun, 25 Nov 2018 16:12:23 +0100 Subject: Fix lax handling of string errors and add separate lax test suite --- NEWS | 6 ++++++ src/n3.c | 23 +++++++++++++---------- tests/good/manifest.ttl | 7 ------- tests/good/test-bad-utf8.nt | 3 --- tests/good/test-bad-utf8.ttl | 3 --- tests/lax/manifest.ttl | 25 +++++++++++++++++++++++++ tests/lax/test-bad-uri.nt | 3 +++ tests/lax/test-bad-uri.ttl | 4 ++++ tests/lax/test-bad-utf8.nt | 6 ++++++ tests/lax/test-bad-utf8.ttl | 6 ++++++ wscript | 14 +++++++------- 11 files changed, 70 insertions(+), 30 deletions(-) delete mode 100644 tests/good/test-bad-utf8.nt delete mode 100644 tests/good/test-bad-utf8.ttl create mode 100644 tests/lax/manifest.ttl create mode 100644 tests/lax/test-bad-uri.nt create mode 100644 tests/lax/test-bad-uri.ttl create mode 100644 tests/lax/test-bad-utf8.nt create mode 100644 tests/lax/test-bad-utf8.ttl diff --git a/NEWS b/NEWS index 4209951b..4a27904f 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,9 @@ +serd (0.30.3) unstable; + + * Fix lax handling of string errors + + -- David Robillard Sun, 27 Oct 2019 21:38:43 +0000 + serd (0.30.2) stable; * Fix GCC 4 build diff --git a/src/n3.c b/src/n3.c index 1a9bfaeb..8b61b93c 100644 --- a/src/n3.c +++ b/src/n3.c @@ -302,8 +302,9 @@ eat_delim(SerdReader* reader, const char delim) static Ref read_STRING_LITERAL_LONG(SerdReader* reader, SerdNodeFlags* flags, uint8_t q) { - Ref ref = push_node(reader, SERD_LITERAL, "", 0); - while (!reader->status) { + Ref ref = push_node(reader, SERD_LITERAL, "", 0); + SerdStatus st = SERD_SUCCESS; + while (!reader->status && !(st && reader->strict)) { const uint8_t c = peek_byte(reader); if (c == '\\') { eat_byte_safe(reader, c); @@ -326,7 +327,7 @@ read_STRING_LITERAL_LONG(SerdReader* reader, SerdNodeFlags* flags, uint8_t q) push_byte(reader, ref, c); read_character(reader, ref, flags, q2); } else { - read_character(reader, ref, flags, eat_byte_safe(reader, c)); + st = read_character(reader, ref, flags, eat_byte_safe(reader, c)); } } return ref; @@ -337,8 +338,9 @@ read_STRING_LITERAL_LONG(SerdReader* reader, SerdNodeFlags* flags, uint8_t q) static Ref read_STRING_LITERAL(SerdReader* reader, SerdNodeFlags* flags, uint8_t q) { - Ref ref = push_node(reader, SERD_LITERAL, "", 0); - while (!reader->status) { + Ref ref = push_node(reader, SERD_LITERAL, "", 0); + SerdStatus st = SERD_SUCCESS; + while (!reader->status && !(st && reader->strict)) { const uint8_t c = peek_byte(reader); uint32_t code = 0; switch (c) { @@ -359,7 +361,7 @@ read_STRING_LITERAL(SerdReader* reader, SerdNodeFlags* flags, uint8_t q) eat_byte_check(reader, q); return ref; } else { - read_character(reader, ref, flags, eat_byte_safe(reader, c)); + st = read_character(reader, ref, flags, eat_byte_safe(reader, c)); } } } @@ -615,13 +617,14 @@ static Ref read_IRIREF(SerdReader* reader) { TRY_RET(eat_byte_check(reader, '<')); - Ref ref = push_node(reader, SERD_URI, "", 0); + Ref ref = push_node(reader, SERD_URI, "", 0); + SerdStatus st = SERD_SUCCESS; if (!fancy_syntax(reader) && !read_IRIREF_scheme(reader, ref)) { return pop_node(reader, ref); } uint32_t code = 0; - while (!reader->status) { + while (!reader->status && !(st && reader->strict)) { const uint8_t c = eat_byte_safe(reader, peek_byte(reader)); switch (c) { case '"': case '<': case '^': case '`': case '{': case '|': case '}': @@ -660,11 +663,11 @@ read_IRIREF(SerdReader* reader) push_byte(reader, ref, c); } else if (!(c & 0x80)) { push_byte(reader, ref, c); - } else if (read_utf8_character(reader, ref, c)) { + } else if ((st = read_utf8_character(reader, ref, c))) { if (reader->strict) { + reader->status = SERD_FAILURE; return pop_node(reader, ref); } - reader->status = SERD_FAILURE; } } } diff --git a/tests/good/manifest.ttl b/tests/good/manifest.ttl index dd83a7e5..9dccde7b 100644 --- a/tests/good/manifest.ttl +++ b/tests/good/manifest.ttl @@ -18,7 +18,6 @@ <#test-30> <#test-a-without-whitespace> <#test-backspace> - <#test-bad-utf8> <#test-base-query> <#test-blank-cont> <#test-blank-in-list> @@ -109,12 +108,6 @@ mf:action ; mf:result . -<#test-bad-utf8> - rdf:type rdft:TestTurtleEval ; - mf:name "test-bad-utf8" ; - mf:action ; - mf:result . - <#test-base-query> rdf:type rdft:TestTurtleEval ; mf:name "test-base-query" ; diff --git a/tests/good/test-bad-utf8.nt b/tests/good/test-bad-utf8.nt deleted file mode 100644 index 6d73993d..00000000 --- a/tests/good/test-bad-utf8.nt +++ /dev/null @@ -1,3 +0,0 @@ - "Impossible bytes: \uFFFD \uFFFD" . - "2 continuation bytes: \uFFFD" . - "Missing continuation: \uFFFD" . diff --git a/tests/good/test-bad-utf8.ttl b/tests/good/test-bad-utf8.ttl deleted file mode 100644 index 2c105f5b..00000000 --- a/tests/good/test-bad-utf8.ttl +++ /dev/null @@ -1,3 +0,0 @@ - "Impossible bytes: þ ÿ" . - "2 continuation bytes: €¿" . - "Missing continuation: À" . diff --git a/tests/lax/manifest.ttl b/tests/lax/manifest.ttl new file mode 100644 index 00000000..e3b11ff4 --- /dev/null +++ b/tests/lax/manifest.ttl @@ -0,0 +1,25 @@ +@prefix mf: . +@prefix rdf: . +@prefix rdfs: . +@prefix rdft: . + +<> + rdf:type mf:Manifest ; + rdfs:comment "Serd lax syntax test cases" ; + mf:entries ( + <#test-bad-uri> + <#test-bad-utf8> + ) . + +<#test-bad-uri> + rdf:type rdft:TestTurtleNegativeSyntax ; + mf:name "test-bad-uri" ; + mf:action ; + mf:result . + +<#test-bad-utf8> + rdf:type rdft:TestTurtleNegativeSyntax ; + mf:name "test-bad-utf8" ; + mf:action ; + mf:result . + diff --git a/tests/lax/test-bad-uri.nt b/tests/lax/test-bad-uri.nt new file mode 100644 index 00000000..7458e12a --- /dev/null +++ b/tests/lax/test-bad-uri.nt @@ -0,0 +1,3 @@ + . + . + . diff --git a/tests/lax/test-bad-uri.ttl b/tests/lax/test-bad-uri.ttl new file mode 100644 index 00000000..8f11b1d7 --- /dev/null +++ b/tests/lax/test-bad-uri.ttl @@ -0,0 +1,4 @@ + . + . + . + . diff --git a/tests/lax/test-bad-utf8.nt b/tests/lax/test-bad-utf8.nt new file mode 100644 index 00000000..09200e9d --- /dev/null +++ b/tests/lax/test-bad-utf8.nt @@ -0,0 +1,6 @@ + "Impossible bytes: \uFFFD \uFFFD" . + "2 continuation bytes: \uFFFD" . + "Missing continuation: \uFFFD" . + "Impossible bytes: \uFFFD \uFFFD" . + "2 continuation bytes: \uFFFD" . + "Missing continuation: \uFFFD" . diff --git a/tests/lax/test-bad-utf8.ttl b/tests/lax/test-bad-utf8.ttl new file mode 100644 index 00000000..526a171f --- /dev/null +++ b/tests/lax/test-bad-utf8.ttl @@ -0,0 +1,6 @@ + "Impossible bytes: þ ÿ" . + "2 continuation bytes: €¿" . + "Missing continuation: À" . + """Impossible bytes: þ ÿ""" . + """2 continuation bytes: €¿""" . + """Missing continuation: À""" . diff --git a/wscript b/wscript index c34caeab..aa9fe3b1 100644 --- a/wscript +++ b/wscript @@ -357,7 +357,8 @@ def test_suite(ctx, base_uri, testdir, report, isyntax, options=[]): expected=expected_return, name=action) - if result and ((mf + 'result') in model[test]): + if (result and expected_return == 0 and + ((mf + 'result') in model[test])): # Check output against test suite check_uri = model[test][mf + 'result'][0] check_path = ctx.src_path(file_uri_to_path(check_uri)) @@ -373,21 +374,18 @@ def test_suite(ctx, base_uri, testdir, report, isyntax, options=[]): if report is not None: report.write(earl_assertion(test, result, asserter)) - # Run lax test - check([command[0]] + ['-l'] + command[1:], - expected=None, name=action + ' lax') - ns_rdftest = 'http://www.w3.org/ns/rdftest#' for test_class, instances in instances.items(): if test_class.startswith(ns_rdftest): - expected = 1 if 'Negative' in test_class else 0 + expected = 1 if '-l' not in options and 'Negative' in test_class else 0 run_tests(test_class, instances, expected) def test(tst): import tempfile # Create test output directories - for i in ['bad', 'good', 'TurtleTests', 'NTriplesTests', 'NQuadsTests', 'TriGTests']: + for i in ['bad', 'good', 'lax', + 'TurtleTests', 'NTriplesTests', 'NQuadsTests', 'TriGTests']: try: test_dir = os.path.join('tests', i) os.makedirs(test_dir) @@ -472,6 +470,8 @@ def test(tst): serd_base = 'http://drobilla.net/sw/serd/tests/' test_suite(tst, serd_base + 'good/', 'good', None, 'Turtle') test_suite(tst, serd_base + 'bad/', 'bad', None, 'Turtle') + test_suite(tst, serd_base + 'lax/', 'lax', None, 'Turtle', ['-l']) + test_suite(tst, serd_base + 'lax/', 'lax', None, 'Turtle') # Standard test suites with open('earl.ttl', 'w') as report: -- cgit v1.2.1