From 9facb914c72bf4d0473b49faa797cdb5c9faf68b Mon Sep 17 00:00:00 2001 From: David Robillard Date: Fri, 29 Mar 2024 06:55:05 -0400 Subject: Fix lax NQuads parsing --- src/n3.c | 14 ++++--- test/extra/lax/manifest.ttl | 68 ++++++++++++++++++++++++++------- test/extra/lax/test-bad-string-out.nt | 1 + test/extra/lax/test-bad-string.nq | 3 ++ test/extra/lax/test-bad-string.nt | 2 + test/extra/lax/test-bad-uri-nq-out.nq | 4 ++ test/extra/lax/test-bad-uri-out.nt | 4 ++ test/extra/lax/test-bad-uri.nq | 4 ++ test/extra/lax/test-bad-uri.nt | 4 +- test/extra/lax/test-bad-uri.ttl | 9 +++-- test/extra/lax/test-bad-utf8-nq-out.nq | 3 ++ test/extra/lax/test-bad-utf8-nt-out.nt | 3 ++ test/extra/lax/test-bad-utf8-ttl-out.nt | 6 +++ test/extra/lax/test-bad-utf8.nq | 3 ++ test/extra/lax/test-bad-utf8.nt | 9 ++--- 15 files changed, 107 insertions(+), 30 deletions(-) create mode 100644 test/extra/lax/test-bad-string-out.nt create mode 100644 test/extra/lax/test-bad-string.nq create mode 100644 test/extra/lax/test-bad-uri-nq-out.nq create mode 100644 test/extra/lax/test-bad-uri-out.nt create mode 100644 test/extra/lax/test-bad-uri.nq create mode 100644 test/extra/lax/test-bad-utf8-nq-out.nq create mode 100644 test/extra/lax/test-bad-utf8-nt-out.nt create mode 100644 test/extra/lax/test-bad-utf8-ttl-out.nt create mode 100644 test/extra/lax/test-bad-utf8.nq diff --git a/src/n3.c b/src/n3.c index da3862db..6684fae8 100644 --- a/src/n3.c +++ b/src/n3.c @@ -1783,13 +1783,17 @@ read_nquads_statement(SerdReader* const reader) SerdStatus read_nquadsDoc(SerdReader* const reader) { - SerdStatus st = SERD_SUCCESS; - - while (!reader->source.eof && !st) { - st = read_nquads_statement(reader); + while (!reader->source.eof) { + const SerdStatus st = read_nquads_statement(reader); + if (st > SERD_FAILURE) { + if (reader->strict) { + return st; + } + serd_reader_skip_until_byte(reader, '\n'); + } } - return st; + return SERD_SUCCESS; } #if defined(__clang__) && __clang_major__ >= 10 diff --git a/test/extra/lax/manifest.ttl b/test/extra/lax/manifest.ttl index a04a74f6..b9890e14 100644 --- a/test/extra/lax/manifest.ttl +++ b/test/extra/lax/manifest.ttl @@ -6,32 +6,74 @@ a mf:Manifest ; rdfs:comment "Serd lax parsing test suite" ; mf:entries ( - <#test-bad-string> - <#test-bad-uri> - <#test-bad-utf8> + <#test-bad-string-nq> + <#test-bad-string-nt> + <#test-bad-string-ttl> + <#test-bad-uri-nq> + <#test-bad-uri-nt> + <#test-bad-uri-ttl> + <#test-bad-utf8-nq> + <#test-bad-utf8-nt> + <#test-bad-utf8-ttl> <#test-lone-list> ) . -<#test-bad-string> +<#test-bad-string-nq> + a rdft:TestNQuadsNegativeSyntax ; + mf:action ; + mf:name "test-bad-string-nq" ; + mf:result . + +<#test-bad-string-nt> + a rdft:TestNTriplesNegativeSyntax ; + mf:action ; + mf:name "test-bad-string-nt" ; + mf:result . + +<#test-bad-string-ttl> a rdft:TestTurtleNegativeSyntax ; mf:action ; - mf:name "test-bad-string" ; - mf:result . + mf:name "test-bad-string-ttl" ; + mf:result . + +<#test-bad-uri-nq> + a rdft:TestNQuadsNegativeSyntax ; + mf:action ; + mf:name "test-bad-uri-nq" ; + mf:result . + +<#test-bad-uri-nt> + a rdft:TestNTriplesNegativeSyntax ; + mf:action ; + mf:name "test-bad-uri-nt" ; + mf:result . -<#test-bad-uri> +<#test-bad-uri-ttl> a rdft:TestTurtleNegativeSyntax ; mf:action ; - mf:name "test-bad-uri" ; - mf:result . + mf:name "test-bad-uri-ttl" ; + mf:result . -<#test-bad-utf8> +<#test-bad-utf8-nq> + a rdft:TestNQuadsNegativeSyntax ; + mf:action ; + mf:name "test-bad-utf8-nq" ; + mf:result . + +<#test-bad-utf8-nt> + a rdft:TestNTriplesNegativeSyntax ; + mf:action ; + mf:name "test-bad-utf8-nt" ; + mf:result . + +<#test-bad-utf8-ttl> a rdft:TestTurtleNegativeSyntax ; mf:action ; - mf:name "test-bad-utf8" ; - mf:result . + mf:name "test-bad-utf8-ttl" ; + mf:result . <#test-lone-list> a rdft:TestTurtleNegativeSyntax ; - mf:name "test-lone-list" ; mf:action ; + mf:name "test-lone-list" ; mf:result . diff --git a/test/extra/lax/test-bad-string-out.nt b/test/extra/lax/test-bad-string-out.nt new file mode 100644 index 00000000..24f80a2e --- /dev/null +++ b/test/extra/lax/test-bad-string-out.nt @@ -0,0 +1 @@ + "Good" . diff --git a/test/extra/lax/test-bad-string.nq b/test/extra/lax/test-bad-string.nq new file mode 100644 index 00000000..72eb9621 --- /dev/null +++ b/test/extra/lax/test-bad-string.nq @@ -0,0 +1,3 @@ + "Truncated line + "Bad escape \? " . + "Good" . diff --git a/test/extra/lax/test-bad-string.nt b/test/extra/lax/test-bad-string.nt index 24f80a2e..72eb9621 100644 --- a/test/extra/lax/test-bad-string.nt +++ b/test/extra/lax/test-bad-string.nt @@ -1 +1,3 @@ + "Truncated line + "Bad escape \? " . "Good" . diff --git a/test/extra/lax/test-bad-uri-nq-out.nq b/test/extra/lax/test-bad-uri-nq-out.nq new file mode 100644 index 00000000..e14f3a08 --- /dev/null +++ b/test/extra/lax/test-bad-uri-nq-out.nq @@ -0,0 +1,4 @@ + . + . + . + . diff --git a/test/extra/lax/test-bad-uri-out.nt b/test/extra/lax/test-bad-uri-out.nt new file mode 100644 index 00000000..8cb00ba7 --- /dev/null +++ b/test/extra/lax/test-bad-uri-out.nt @@ -0,0 +1,4 @@ + . + . + . + . diff --git a/test/extra/lax/test-bad-uri.nq b/test/extra/lax/test-bad-uri.nq new file mode 100644 index 00000000..8f11b1d7 --- /dev/null +++ b/test/extra/lax/test-bad-uri.nq @@ -0,0 +1,4 @@ + . + . + . + . diff --git a/test/extra/lax/test-bad-uri.nt b/test/extra/lax/test-bad-uri.nt index 8cb00ba7..8f11b1d7 100644 --- a/test/extra/lax/test-bad-uri.nt +++ b/test/extra/lax/test-bad-uri.nt @@ -1,4 +1,4 @@ - . + . . - . + . . diff --git a/test/extra/lax/test-bad-uri.ttl b/test/extra/lax/test-bad-uri.ttl index 8f11b1d7..1a724fd1 100644 --- a/test/extra/lax/test-bad-uri.ttl +++ b/test/extra/lax/test-bad-uri.ttl @@ -1,4 +1,5 @@ - . - . - . - . + + , + ; + ; + . diff --git a/test/extra/lax/test-bad-utf8-nq-out.nq b/test/extra/lax/test-bad-utf8-nq-out.nq new file mode 100644 index 00000000..554cf199 --- /dev/null +++ b/test/extra/lax/test-bad-utf8-nq-out.nq @@ -0,0 +1,3 @@ + "Impossible bytes: � �" . + "2 continuation bytes: �" . + "Missing continuation: �" . diff --git a/test/extra/lax/test-bad-utf8-nt-out.nt b/test/extra/lax/test-bad-utf8-nt-out.nt new file mode 100644 index 00000000..8cefa258 --- /dev/null +++ b/test/extra/lax/test-bad-utf8-nt-out.nt @@ -0,0 +1,3 @@ + "Impossible bytes: \uFFFD \uFFFD" . + "2 continuation bytes: \uFFFD" . + "Missing continuation: \uFFFD" . diff --git a/test/extra/lax/test-bad-utf8-ttl-out.nt b/test/extra/lax/test-bad-utf8-ttl-out.nt new file mode 100644 index 00000000..58f2c52b --- /dev/null +++ b/test/extra/lax/test-bad-utf8-ttl-out.nt @@ -0,0 +1,6 @@ + "Impossible bytes: \uFFFD \uFFFD" . + "2 continuation bytes: \uFFFD" . + "Missing continuation: \uFFFD" . + "Impossible bytes: \uFFFD \uFFFD" . + "2 continuation bytes: \uFFFD" . + "Missing continuation: \uFFFD" . diff --git a/test/extra/lax/test-bad-utf8.nq b/test/extra/lax/test-bad-utf8.nq new file mode 100644 index 00000000..b8c04637 --- /dev/null +++ b/test/extra/lax/test-bad-utf8.nq @@ -0,0 +1,3 @@ + "Impossible bytes: þ ÿ" . + "2 continuation bytes: €¿" . + "Missing continuation: À" . diff --git a/test/extra/lax/test-bad-utf8.nt b/test/extra/lax/test-bad-utf8.nt index 58f2c52b..b8c04637 100644 --- a/test/extra/lax/test-bad-utf8.nt +++ b/test/extra/lax/test-bad-utf8.nt @@ -1,6 +1,3 @@ - "Impossible bytes: \uFFFD \uFFFD" . - "2 continuation bytes: \uFFFD" . - "Missing continuation: \uFFFD" . - "Impossible bytes: \uFFFD \uFFFD" . - "2 continuation bytes: \uFFFD" . - "Missing continuation: \uFFFD" . + "Impossible bytes: þ ÿ" . + "2 continuation bytes: €¿" . + "Missing continuation: À" . -- cgit v1.2.1