From c57e92c6eeb715f0caa6c4799a7a1e54e49fc62f Mon Sep 17 00:00:00 2001 From: David Robillard Date: Sun, 9 Jun 2024 18:21:21 -0400 Subject: Treat out of range unicode characters as errors --- NEWS | 3 ++- src/n3.c | 4 ++-- test/extra/bad/manifest.ttl | 1 + test/extra/good/manifest.ttl | 7 ------- test/extra/good/test-out-of-range-unicode.nt | 1 - test/extra/good/test-out-of-range-unicode.ttl | 1 - test/extra/lax/manifest.ttl | 7 +++++++ test/extra/lax/test-out-of-range-unicode.nt | 1 + test/extra/lax/test-out-of-range-unicode.ttl | 1 + 9 files changed, 14 insertions(+), 12 deletions(-) delete mode 100644 test/extra/good/test-out-of-range-unicode.nt delete mode 100644 test/extra/good/test-out-of-range-unicode.ttl create mode 100644 test/extra/lax/test-out-of-range-unicode.nt create mode 100644 test/extra/lax/test-out-of-range-unicode.ttl diff --git a/NEWS b/NEWS index fc35155c..f051f9ee 100644 --- a/NEWS +++ b/NEWS @@ -4,8 +4,9 @@ serd (0.32.3) unstable; urgency=medium * Fix library current_version on MacOS * Fix parsing NQuads lines with no space before the final dot * Support reading lone lists in lax mode + * Treat out of range unicode characters as errors - -- David Robillard Fri, 29 Mar 2024 13:36:36 +0000 + -- David Robillard Sun, 09 Jun 2024 22:15:14 +0000 serd (0.32.2) stable; urgency=medium diff --git a/src/n3.c b/src/n3.c index 6684fae8..39b00d13 100644 --- a/src/n3.c +++ b/src/n3.c @@ -97,7 +97,7 @@ read_UCHAR(SerdReader* const reader, const Ref dest, uint32_t* const char_code) code); push_bytes(reader, dest, replacement_char, 3); *char_code = 0xFFFD; - return SERD_SUCCESS; + return reader->strict ? SERD_ERR_BAD_SYNTAX : SERD_SUCCESS; } // Build output in buf @@ -325,7 +325,7 @@ read_string_escape(SerdReader* const reader, uint32_t code = 0; if ((st = read_ECHAR(reader, ref, flags)) && (st = read_UCHAR(reader, ref, &code))) { - return r_err(reader, st, "invalid escape '\\%c'\n", peek_byte(reader)); + return r_err(reader, st, "expected string escape sequence\n"); } return st; diff --git a/test/extra/bad/manifest.ttl b/test/extra/bad/manifest.ttl index 7e35e5ff..dfb84ce9 100644 --- a/test/extra/bad/manifest.ttl +++ b/test/extra/bad/manifest.ttl @@ -13,6 +13,7 @@ <#bad-blank-node-label> <#bad-blank-predicate> <#bad-blank-syntax> + <#bad-blank-syntax> <#bad-bom> <#bad-char-in-local> <#bad-char-in-prefix> diff --git a/test/extra/good/manifest.ttl b/test/extra/good/manifest.ttl index 3b911990..c464d700 100644 --- a/test/extra/good/manifest.ttl +++ b/test/extra/good/manifest.ttl @@ -48,7 +48,6 @@ <#test-nt-syntax-eol-crlf> <#test-nt-syntax-eol-lf> <#test-nt-syntax-eol-lfcr> - <#test-out-of-range-unicode> <#test-prefix> <#test-quote-escapes> <#test-rel> @@ -295,12 +294,6 @@ mf:action ; mf:name "test-nt-syntax-eol-lfcr" . -<#test-out-of-range-unicode> - a rdft:TestTurtleEval ; - mf:action ; - mf:name "test-out-of-range-unicode" ; - mf:result . - <#test-prefix> a rdft:TestTurtleEval ; mf:action ; diff --git a/test/extra/good/test-out-of-range-unicode.nt b/test/extra/good/test-out-of-range-unicode.nt deleted file mode 100644 index 5def9e31..00000000 --- a/test/extra/good/test-out-of-range-unicode.nt +++ /dev/null @@ -1 +0,0 @@ - "\uFFFD" . diff --git a/test/extra/good/test-out-of-range-unicode.ttl b/test/extra/good/test-out-of-range-unicode.ttl deleted file mode 100644 index 7e64785a..00000000 --- a/test/extra/good/test-out-of-range-unicode.ttl +++ /dev/null @@ -1 +0,0 @@ - "\U00110000" . diff --git a/test/extra/lax/manifest.ttl b/test/extra/lax/manifest.ttl index bf51f931..f79e1984 100644 --- a/test/extra/lax/manifest.ttl +++ b/test/extra/lax/manifest.ttl @@ -19,6 +19,7 @@ <#test-bad-utf8-ttl> <#test-bad-utf8-trig> <#test-lone-list> + <#test-out-of-range-unicode> ) . <#test-bad-string-nq> @@ -98,3 +99,9 @@ mf:action ; mf:name "test-lone-list" ; mf:result . + +<#test-out-of-range-unicode> + a rdft:TestTurtleNegativeSyntax ; + mf:action ; + mf:name "test-out-of-range-unicode" ; + mf:result . diff --git a/test/extra/lax/test-out-of-range-unicode.nt b/test/extra/lax/test-out-of-range-unicode.nt new file mode 100644 index 00000000..5def9e31 --- /dev/null +++ b/test/extra/lax/test-out-of-range-unicode.nt @@ -0,0 +1 @@ + "\uFFFD" . diff --git a/test/extra/lax/test-out-of-range-unicode.ttl b/test/extra/lax/test-out-of-range-unicode.ttl new file mode 100644 index 00000000..7e64785a --- /dev/null +++ b/test/extra/lax/test-out-of-range-unicode.ttl @@ -0,0 +1 @@ + "\U00110000" . -- cgit v1.2.1