diff options
author | David Robillard <d@drobilla.net> | 2019-10-27 19:48:02 +0100 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2019-10-27 22:41:27 +0100 |
commit | f7ffff1e75634909da60ea63a7c52f1a001220b8 (patch) | |
tree | d14587114d96a0be8408709c1f315412440d078d | |
parent | cd6d4569c1c8819cc8e54eefdc0ac389d8efb4ea (diff) | |
download | serd-f7ffff1e75634909da60ea63a7c52f1a001220b8.tar.gz serd-f7ffff1e75634909da60ea63a7c52f1a001220b8.tar.bz2 serd-f7ffff1e75634909da60ea63a7c52f1a001220b8.zip |
Fix EOF handling while reading in bulk or from strings
-rw-r--r-- | NEWS | 3 | ||||
-rw-r--r-- | src/byte_source.c | 2 | ||||
-rw-r--r-- | src/n3.c | 38 | ||||
-rw-r--r-- | src/serd_internal.h | 11 | ||||
-rw-r--r-- | tests/bad/bad-eof-after-quotes.ttl | 3 | ||||
-rw-r--r-- | tests/bad/bad-eof-at-string-start.ttl | 3 | ||||
-rw-r--r-- | tests/bad/bad-eof-in-long-string.ttl | 3 | ||||
-rw-r--r-- | tests/bad/bad-eof-in-uri-scheme.nt | 1 | ||||
-rw-r--r-- | tests/bad/manifest.ttl | 24 |
9 files changed, 70 insertions, 18 deletions
@@ -1,8 +1,9 @@ serd (0.30.3) unstable; + * Fix EOF handling while reading in bulk or from strings * Fix lax handling of string errors - -- David Robillard <d@drobilla.net> Sun, 27 Oct 2019 21:38:43 +0000 + -- David Robillard <d@drobilla.net> Sun, 27 Oct 2019 21:41:05 +0000 serd (0.30.2) stable; diff --git a/src/byte_source.c b/src/byte_source.c index 1a67157b..210e638e 100644 --- a/src/byte_source.c +++ b/src/byte_source.c @@ -29,6 +29,7 @@ serd_byte_source_page(SerdByteSource* source) ? SERD_ERR_UNKNOWN : SERD_FAILURE); } else if (n_read < source->page_size) { source->file_buf[n_read] = '\0'; + source->buf_size = n_read; } return SERD_SUCCESS; } @@ -47,6 +48,7 @@ serd_byte_source_open_source(SerdByteSource* source, source->stream = stream; source->from_stream = true; source->page_size = page_size; + source->buf_size = page_size; source->cur = cur; source->error_func = error_func; source->read_func = read_func; @@ -167,7 +167,7 @@ static inline SerdStatus bad_char(SerdReader* reader, const char* fmt, uint8_t c) { // Skip bytes until the next start byte - for (int b = peek_byte(reader); (b & 0x80);) { + for (int b = peek_byte(reader); b != EOF && ((uint8_t)b & 0x80);) { eat_byte_safe(reader, b); b = peek_byte(reader); } @@ -187,7 +187,7 @@ read_utf8_bytes(SerdReader* reader, uint8_t bytes[4], uint32_t* size, uint8_t c) bytes[0] = c; for (unsigned i = 1; i < *size; ++i) { const int b = peek_byte(reader); - if ((b & 0x80) == 0) { + if (b == EOF || ((uint8_t)b & 0x80) == 0) { return bad_char(reader, "invalid UTF-8 continuation 0x%X\n", (uint8_t)b); } @@ -255,7 +255,7 @@ read_comment(SerdReader* reader) { eat_byte_safe(reader, '#'); int c; - while (((c = peek_byte(reader)) != 0xA) && (c != 0xD) && c) { + while (((c = peek_byte(reader)) != 0xA) && c != 0xD && c != EOF && c) { eat_byte_safe(reader, c); } } @@ -330,6 +330,9 @@ read_STRING_LITERAL_LONG(SerdReader* reader, SerdNodeFlags* flags, uint8_t q) *flags |= SERD_HAS_QUOTE; push_byte(reader, ref, c); read_character(reader, ref, flags, (uint8_t)q2); + } else if (c == EOF) { + r_err(reader, SERD_ERR_BAD_SYNTAX, "end of file in long string\n"); + return pop_node(reader, ref); } else { st = read_character( reader, ref, flags, (uint8_t)eat_byte_safe(reader, c)); @@ -349,6 +352,9 @@ read_STRING_LITERAL(SerdReader* reader, SerdNodeFlags* flags, uint8_t q) const int c = peek_byte(reader); uint32_t code = 0; switch (c) { + case EOF: + r_err(reader, SERD_ERR_BAD_SYNTAX, "end of file in short string\n"); + return pop_node(reader, ref); case '\n': case '\r': r_err(reader, SERD_ERR_BAD_SYNTAX, "line end in short string\n"); return pop_node(reader, ref); @@ -382,13 +388,17 @@ read_String(SerdReader* reader, SerdNodeFlags* flags) eat_byte_safe(reader, q1); const int q2 = peek_byte(reader); - if (q2 != q1) { // Short string (not triple quoted) + if (q2 == EOF) { + return r_err(reader, SERD_ERR_BAD_SYNTAX, "unexpected end of file\n"); + } else if (q2 != q1) { // Short string (not triple quoted) return read_STRING_LITERAL(reader, flags, (uint8_t)q1); } eat_byte_safe(reader, q2); const int q3 = peek_byte(reader); - if (q3 != q1) { // Empty short string ("" or '') + if (q3 == EOF) { + return r_err(reader, SERD_ERR_BAD_SYNTAX, "unexpected end of file\n"); + } else if (q3 != q1) { // Empty short string ("" or '') return push_node(reader, SERD_LITERAL, "", 0); } @@ -420,7 +430,7 @@ read_PN_CHARS_BASE(SerdReader* reader, Ref dest) SerdStatus st = SERD_SUCCESS; if (is_alpha(c)) { push_byte(reader, dest, eat_byte_safe(reader, c)); - } else if (!(c & 0x80)) { + } else if (c == EOF || !(c & 0x80)) { return SERD_FAILURE; } else if ((st = read_utf8_code(reader, dest, &code, (uint8_t)eat_byte_safe(reader, c)))) { @@ -450,7 +460,7 @@ read_PN_CHARS(SerdReader* reader, Ref dest) SerdStatus st = SERD_SUCCESS; if (is_alpha(c) || is_digit(c) || c == '_' || c == '-') { push_byte(reader, dest, eat_byte_safe(reader, c)); - } else if (!(c & 0x80)) { + } else if (c == EOF || !(c & 0x80)) { return SERD_FAILURE; } else if ((st = read_utf8_code(reader, dest, &code, (uint8_t)eat_byte_safe(reader, c)))) { @@ -603,7 +613,7 @@ read_IRIREF_scheme(SerdReader* reader, Ref dest) "bad IRI scheme start `%c'\n", c); } - while ((c = peek_byte(reader))) { + while ((c = peek_byte(reader)) != EOF) { if (c == '>') { return r_err(reader, SERD_ERR_BAD_SYNTAX, "missing IRI scheme\n"); } else if (!is_uri_scheme_char(c)) { @@ -617,7 +627,7 @@ read_IRIREF_scheme(SerdReader* reader, Ref dest) } } - return false; + return r_err(reader, SERD_ERR_BAD_SYNTAX, "unexpected end of file\n"); } static Ref @@ -981,7 +991,7 @@ read_object(SerdReader* reader, ReadContext* ctx, bool emit, bool* ate_dot) } } switch (c) { - case '\0': case ')': + case EOF: case '\0': case ')': return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected object\n"); case '[': simple = false; @@ -1081,7 +1091,7 @@ read_predicateObjectList(SerdReader* reader, ReadContext ctx, bool* ate_dot) do { read_ws_star(reader); switch (c = peek_byte(reader)) { - case 0: + case EOF: case '\0': return r_err(reader, SERD_ERR_BAD_SYNTAX, "unexpected end of file\n"); case '.': case ']': case '}': @@ -1362,8 +1372,7 @@ read_n3_statement(SerdReader* reader) bool ret = true; read_ws_star(reader); switch (peek_byte(reader)) { - case '\0': - reader->source.eof = true; + case EOF: case '\0': return reader->status <= SERD_FAILURE; case '@': if (!fancy_syntax(reader)) { @@ -1455,8 +1464,7 @@ read_nquadsDoc(SerdReader* reader) bool ate_dot = false; int s_type = 0; read_ws_star(reader); - if (peek_byte(reader) == '\0') { - reader->source.eof = true; + if (peek_byte(reader) == EOF) { break; } else if (peek_byte(reader) == '@') { return r_err(reader, SERD_ERR_BAD_SYNTAX, diff --git a/src/serd_internal.h b/src/serd_internal.h index eeb64687..9c58c151 100644 --- a/src/serd_internal.h +++ b/src/serd_internal.h @@ -92,6 +92,7 @@ typedef struct { SerdStreamErrorFunc error_func; ///< Error function (e.g. ferror) void* stream; ///< Stream (e.g. FILE) size_t page_size; ///< Number of bytes to read at a time + size_t buf_size; ///< Number of bytes in file_buf Cursor cur; ///< Cursor for error reporting uint8_t* file_buf; ///< Buffer iff reading pages from a file const uint8_t* read_buf; ///< Pointer to file_buf or read_byte @@ -140,28 +141,34 @@ serd_byte_source_advance(SerdByteSource* source) SerdStatus st = SERD_SUCCESS; switch (serd_byte_source_peek(source)) { - case '\0': break; case '\n': ++source->cur.line; source->cur.col = 0; break; default: ++source->cur.col; } + const bool was_eof = source->eof; if (source->from_stream) { source->eof = false; if (source->page_size > 1) { if (++source->read_head == source->page_size) { st = serd_byte_source_page(source); + } else if (source->read_head == source->buf_size) { + source->eof = true; } } else { if (!source->read_func(&source->read_byte, 1, 1, source->stream)) { + source->eof = true; st = source->error_func(source->stream) ? SERD_ERR_UNKNOWN : SERD_FAILURE; } } } else if (!source->eof) { ++source->read_head; // Move to next character in string + if (source->read_buf[source->read_head] == '\0') { + source->eof = true; + } } - return source->eof ? SERD_FAILURE : st; + return (was_eof && source->eof) ? SERD_FAILURE : st; } /* Stack */ diff --git a/tests/bad/bad-eof-after-quotes.ttl b/tests/bad/bad-eof-after-quotes.ttl new file mode 100644 index 00000000..40e429cb --- /dev/null +++ b/tests/bad/bad-eof-after-quotes.ttl @@ -0,0 +1,3 @@ +@prefix eg: <http://example.org/> . + +<> eg:comment ""
\ No newline at end of file diff --git a/tests/bad/bad-eof-at-string-start.ttl b/tests/bad/bad-eof-at-string-start.ttl new file mode 100644 index 00000000..93d20bcc --- /dev/null +++ b/tests/bad/bad-eof-at-string-start.ttl @@ -0,0 +1,3 @@ +@prefix eg: <http://example.org/> . + +<> eg:comment "
\ No newline at end of file diff --git a/tests/bad/bad-eof-in-long-string.ttl b/tests/bad/bad-eof-in-long-string.ttl new file mode 100644 index 00000000..2ef179a8 --- /dev/null +++ b/tests/bad/bad-eof-in-long-string.ttl @@ -0,0 +1,3 @@ +@prefix eg: <http://example.org/> . + +<> eg:comment """This is the string that never ends
\ No newline at end of file diff --git a/tests/bad/bad-eof-in-uri-scheme.nt b/tests/bad/bad-eof-in-uri-scheme.nt new file mode 100644 index 00000000..de892dcf --- /dev/null +++ b/tests/bad/bad-eof-in-uri-scheme.nt @@ -0,0 +1 @@ +<http://example.org/s> <http://example.org/p> <ht
\ No newline at end of file diff --git a/tests/bad/manifest.ttl b/tests/bad/manifest.ttl index bd51ba48..4d543dd4 100644 --- a/tests/bad/manifest.ttl +++ b/tests/bad/manifest.ttl @@ -30,6 +30,8 @@ <#bad-char-in-uri> <#bad-datatype> <#bad-dot-after-subject> + <#bad-eof-after-quotes> + <#bad-eof-at-string-start> <#bad-eof-in-blank> <#bad-eof-in-escape> <#bad-eof-in-lang-suffix> @@ -38,9 +40,11 @@ <#bad-eof-in-object-list2> <#bad-eof-in-object-list> <#bad-eof-in-predicate-list> + <#bad-eof-in-long-string> <#bad-eof-in-string> <#bad-eof-in-triple-quote> <#bad-eof-in-uri> + <#bad-eof-in-uri-scheme> <#bad-escape> <#bad-ext-namedblank-op> <#bad-hex-digit> @@ -186,6 +190,16 @@ mf:name "bad-dot-after-subject" ; mf:action <bad-dot-after-subject.ttl> . +<#bad-eof-after-quotes> + rdf:type rdft:TestTurtleNegativeSyntax ; + mf:name "bad-eof-after-quotes" ; + mf:action <bad-eof-after-quotes.ttl> . + +<#bad-eof-at-string-start> + rdf:type rdft:TestTurtleNegativeSyntax ; + mf:name "bad-eof-at-string-start" ; + mf:action <bad-eof-at-string-start.ttl> . + <#bad-eof-in-blank> rdf:type rdft:TestTurtleNegativeSyntax ; mf:name "bad-eof-in-blank" ; @@ -226,6 +240,11 @@ mf:name "bad-eof-in-predicate-list" ; mf:action <bad-eof-in-predicate-list.ttl> . +<#bad-eof-in-long-string> + rdf:type rdft:TestTurtleNegativeSyntax ; + mf:name "bad-eof-in-long-string" ; + mf:action <bad-eof-in-long-string.ttl> . + <#bad-eof-in-string> rdf:type rdft:TestTurtleNegativeSyntax ; mf:name "bad-eof-in-string" ; @@ -241,6 +260,11 @@ mf:name "bad-eof-in-uri" ; mf:action <bad-eof-in-uri.ttl> . +<#bad-eof-in-uri-scheme> + rdf:type rdft:TestNTriplesNegativeSyntax ; + mf:name "bad-eof-in-uri-scheme" ; + mf:action <bad-eof-in-uri-scheme.nt> . + <#bad-escape> rdf:type rdft:TestTurtleNegativeSyntax ; mf:name "bad-escape" ; |