aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2019-10-27 19:48:02 +0100
committerDavid Robillard <d@drobilla.net>2019-10-27 22:41:27 +0100
commitf7ffff1e75634909da60ea63a7c52f1a001220b8 (patch)
treed14587114d96a0be8408709c1f315412440d078d
parentcd6d4569c1c8819cc8e54eefdc0ac389d8efb4ea (diff)
downloadserd-f7ffff1e75634909da60ea63a7c52f1a001220b8.tar.gz
serd-f7ffff1e75634909da60ea63a7c52f1a001220b8.tar.bz2
serd-f7ffff1e75634909da60ea63a7c52f1a001220b8.zip
Fix EOF handling while reading in bulk or from strings
-rw-r--r--NEWS3
-rw-r--r--src/byte_source.c2
-rw-r--r--src/n3.c38
-rw-r--r--src/serd_internal.h11
-rw-r--r--tests/bad/bad-eof-after-quotes.ttl3
-rw-r--r--tests/bad/bad-eof-at-string-start.ttl3
-rw-r--r--tests/bad/bad-eof-in-long-string.ttl3
-rw-r--r--tests/bad/bad-eof-in-uri-scheme.nt1
-rw-r--r--tests/bad/manifest.ttl24
9 files changed, 70 insertions, 18 deletions
diff --git a/NEWS b/NEWS
index 4a27904f..b39eefa4 100644
--- a/NEWS
+++ b/NEWS
@@ -1,8 +1,9 @@
serd (0.30.3) unstable;
+ * Fix EOF handling while reading in bulk or from strings
* Fix lax handling of string errors
- -- David Robillard <d@drobilla.net> Sun, 27 Oct 2019 21:38:43 +0000
+ -- David Robillard <d@drobilla.net> Sun, 27 Oct 2019 21:41:05 +0000
serd (0.30.2) stable;
diff --git a/src/byte_source.c b/src/byte_source.c
index 1a67157b..210e638e 100644
--- a/src/byte_source.c
+++ b/src/byte_source.c
@@ -29,6 +29,7 @@ serd_byte_source_page(SerdByteSource* source)
? SERD_ERR_UNKNOWN : SERD_FAILURE);
} else if (n_read < source->page_size) {
source->file_buf[n_read] = '\0';
+ source->buf_size = n_read;
}
return SERD_SUCCESS;
}
@@ -47,6 +48,7 @@ serd_byte_source_open_source(SerdByteSource* source,
source->stream = stream;
source->from_stream = true;
source->page_size = page_size;
+ source->buf_size = page_size;
source->cur = cur;
source->error_func = error_func;
source->read_func = read_func;
diff --git a/src/n3.c b/src/n3.c
index 187f371c..b9bc58cd 100644
--- a/src/n3.c
+++ b/src/n3.c
@@ -167,7 +167,7 @@ static inline SerdStatus
bad_char(SerdReader* reader, const char* fmt, uint8_t c)
{
// Skip bytes until the next start byte
- for (int b = peek_byte(reader); (b & 0x80);) {
+ for (int b = peek_byte(reader); b != EOF && ((uint8_t)b & 0x80);) {
eat_byte_safe(reader, b);
b = peek_byte(reader);
}
@@ -187,7 +187,7 @@ read_utf8_bytes(SerdReader* reader, uint8_t bytes[4], uint32_t* size, uint8_t c)
bytes[0] = c;
for (unsigned i = 1; i < *size; ++i) {
const int b = peek_byte(reader);
- if ((b & 0x80) == 0) {
+ if (b == EOF || ((uint8_t)b & 0x80) == 0) {
return bad_char(reader, "invalid UTF-8 continuation 0x%X\n",
(uint8_t)b);
}
@@ -255,7 +255,7 @@ read_comment(SerdReader* reader)
{
eat_byte_safe(reader, '#');
int c;
- while (((c = peek_byte(reader)) != 0xA) && (c != 0xD) && c) {
+ while (((c = peek_byte(reader)) != 0xA) && c != 0xD && c != EOF && c) {
eat_byte_safe(reader, c);
}
}
@@ -330,6 +330,9 @@ read_STRING_LITERAL_LONG(SerdReader* reader, SerdNodeFlags* flags, uint8_t q)
*flags |= SERD_HAS_QUOTE;
push_byte(reader, ref, c);
read_character(reader, ref, flags, (uint8_t)q2);
+ } else if (c == EOF) {
+ r_err(reader, SERD_ERR_BAD_SYNTAX, "end of file in long string\n");
+ return pop_node(reader, ref);
} else {
st = read_character(
reader, ref, flags, (uint8_t)eat_byte_safe(reader, c));
@@ -349,6 +352,9 @@ read_STRING_LITERAL(SerdReader* reader, SerdNodeFlags* flags, uint8_t q)
const int c = peek_byte(reader);
uint32_t code = 0;
switch (c) {
+ case EOF:
+ r_err(reader, SERD_ERR_BAD_SYNTAX, "end of file in short string\n");
+ return pop_node(reader, ref);
case '\n': case '\r':
r_err(reader, SERD_ERR_BAD_SYNTAX, "line end in short string\n");
return pop_node(reader, ref);
@@ -382,13 +388,17 @@ read_String(SerdReader* reader, SerdNodeFlags* flags)
eat_byte_safe(reader, q1);
const int q2 = peek_byte(reader);
- if (q2 != q1) { // Short string (not triple quoted)
+ if (q2 == EOF) {
+ return r_err(reader, SERD_ERR_BAD_SYNTAX, "unexpected end of file\n");
+ } else if (q2 != q1) { // Short string (not triple quoted)
return read_STRING_LITERAL(reader, flags, (uint8_t)q1);
}
eat_byte_safe(reader, q2);
const int q3 = peek_byte(reader);
- if (q3 != q1) { // Empty short string ("" or '')
+ if (q3 == EOF) {
+ return r_err(reader, SERD_ERR_BAD_SYNTAX, "unexpected end of file\n");
+ } else if (q3 != q1) { // Empty short string ("" or '')
return push_node(reader, SERD_LITERAL, "", 0);
}
@@ -420,7 +430,7 @@ read_PN_CHARS_BASE(SerdReader* reader, Ref dest)
SerdStatus st = SERD_SUCCESS;
if (is_alpha(c)) {
push_byte(reader, dest, eat_byte_safe(reader, c));
- } else if (!(c & 0x80)) {
+ } else if (c == EOF || !(c & 0x80)) {
return SERD_FAILURE;
} else if ((st = read_utf8_code(reader, dest, &code,
(uint8_t)eat_byte_safe(reader, c)))) {
@@ -450,7 +460,7 @@ read_PN_CHARS(SerdReader* reader, Ref dest)
SerdStatus st = SERD_SUCCESS;
if (is_alpha(c) || is_digit(c) || c == '_' || c == '-') {
push_byte(reader, dest, eat_byte_safe(reader, c));
- } else if (!(c & 0x80)) {
+ } else if (c == EOF || !(c & 0x80)) {
return SERD_FAILURE;
} else if ((st = read_utf8_code(reader, dest, &code,
(uint8_t)eat_byte_safe(reader, c)))) {
@@ -603,7 +613,7 @@ read_IRIREF_scheme(SerdReader* reader, Ref dest)
"bad IRI scheme start `%c'\n", c);
}
- while ((c = peek_byte(reader))) {
+ while ((c = peek_byte(reader)) != EOF) {
if (c == '>') {
return r_err(reader, SERD_ERR_BAD_SYNTAX, "missing IRI scheme\n");
} else if (!is_uri_scheme_char(c)) {
@@ -617,7 +627,7 @@ read_IRIREF_scheme(SerdReader* reader, Ref dest)
}
}
- return false;
+ return r_err(reader, SERD_ERR_BAD_SYNTAX, "unexpected end of file\n");
}
static Ref
@@ -981,7 +991,7 @@ read_object(SerdReader* reader, ReadContext* ctx, bool emit, bool* ate_dot)
}
}
switch (c) {
- case '\0': case ')':
+ case EOF: case '\0': case ')':
return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected object\n");
case '[':
simple = false;
@@ -1081,7 +1091,7 @@ read_predicateObjectList(SerdReader* reader, ReadContext ctx, bool* ate_dot)
do {
read_ws_star(reader);
switch (c = peek_byte(reader)) {
- case 0:
+ case EOF: case '\0':
return r_err(reader, SERD_ERR_BAD_SYNTAX,
"unexpected end of file\n");
case '.': case ']': case '}':
@@ -1362,8 +1372,7 @@ read_n3_statement(SerdReader* reader)
bool ret = true;
read_ws_star(reader);
switch (peek_byte(reader)) {
- case '\0':
- reader->source.eof = true;
+ case EOF: case '\0':
return reader->status <= SERD_FAILURE;
case '@':
if (!fancy_syntax(reader)) {
@@ -1455,8 +1464,7 @@ read_nquadsDoc(SerdReader* reader)
bool ate_dot = false;
int s_type = 0;
read_ws_star(reader);
- if (peek_byte(reader) == '\0') {
- reader->source.eof = true;
+ if (peek_byte(reader) == EOF) {
break;
} else if (peek_byte(reader) == '@') {
return r_err(reader, SERD_ERR_BAD_SYNTAX,
diff --git a/src/serd_internal.h b/src/serd_internal.h
index eeb64687..9c58c151 100644
--- a/src/serd_internal.h
+++ b/src/serd_internal.h
@@ -92,6 +92,7 @@ typedef struct {
SerdStreamErrorFunc error_func; ///< Error function (e.g. ferror)
void* stream; ///< Stream (e.g. FILE)
size_t page_size; ///< Number of bytes to read at a time
+ size_t buf_size; ///< Number of bytes in file_buf
Cursor cur; ///< Cursor for error reporting
uint8_t* file_buf; ///< Buffer iff reading pages from a file
const uint8_t* read_buf; ///< Pointer to file_buf or read_byte
@@ -140,28 +141,34 @@ serd_byte_source_advance(SerdByteSource* source)
SerdStatus st = SERD_SUCCESS;
switch (serd_byte_source_peek(source)) {
- case '\0': break;
case '\n': ++source->cur.line; source->cur.col = 0; break;
default: ++source->cur.col;
}
+ const bool was_eof = source->eof;
if (source->from_stream) {
source->eof = false;
if (source->page_size > 1) {
if (++source->read_head == source->page_size) {
st = serd_byte_source_page(source);
+ } else if (source->read_head == source->buf_size) {
+ source->eof = true;
}
} else {
if (!source->read_func(&source->read_byte, 1, 1, source->stream)) {
+ source->eof = true;
st = source->error_func(source->stream) ? SERD_ERR_UNKNOWN
: SERD_FAILURE;
}
}
} else if (!source->eof) {
++source->read_head; // Move to next character in string
+ if (source->read_buf[source->read_head] == '\0') {
+ source->eof = true;
+ }
}
- return source->eof ? SERD_FAILURE : st;
+ return (was_eof && source->eof) ? SERD_FAILURE : st;
}
/* Stack */
diff --git a/tests/bad/bad-eof-after-quotes.ttl b/tests/bad/bad-eof-after-quotes.ttl
new file mode 100644
index 00000000..40e429cb
--- /dev/null
+++ b/tests/bad/bad-eof-after-quotes.ttl
@@ -0,0 +1,3 @@
+@prefix eg: <http://example.org/> .
+
+<> eg:comment "" \ No newline at end of file
diff --git a/tests/bad/bad-eof-at-string-start.ttl b/tests/bad/bad-eof-at-string-start.ttl
new file mode 100644
index 00000000..93d20bcc
--- /dev/null
+++ b/tests/bad/bad-eof-at-string-start.ttl
@@ -0,0 +1,3 @@
+@prefix eg: <http://example.org/> .
+
+<> eg:comment " \ No newline at end of file
diff --git a/tests/bad/bad-eof-in-long-string.ttl b/tests/bad/bad-eof-in-long-string.ttl
new file mode 100644
index 00000000..2ef179a8
--- /dev/null
+++ b/tests/bad/bad-eof-in-long-string.ttl
@@ -0,0 +1,3 @@
+@prefix eg: <http://example.org/> .
+
+<> eg:comment """This is the string that never ends \ No newline at end of file
diff --git a/tests/bad/bad-eof-in-uri-scheme.nt b/tests/bad/bad-eof-in-uri-scheme.nt
new file mode 100644
index 00000000..de892dcf
--- /dev/null
+++ b/tests/bad/bad-eof-in-uri-scheme.nt
@@ -0,0 +1 @@
+<http://example.org/s> <http://example.org/p> <ht \ No newline at end of file
diff --git a/tests/bad/manifest.ttl b/tests/bad/manifest.ttl
index bd51ba48..4d543dd4 100644
--- a/tests/bad/manifest.ttl
+++ b/tests/bad/manifest.ttl
@@ -30,6 +30,8 @@
<#bad-char-in-uri>
<#bad-datatype>
<#bad-dot-after-subject>
+ <#bad-eof-after-quotes>
+ <#bad-eof-at-string-start>
<#bad-eof-in-blank>
<#bad-eof-in-escape>
<#bad-eof-in-lang-suffix>
@@ -38,9 +40,11 @@
<#bad-eof-in-object-list2>
<#bad-eof-in-object-list>
<#bad-eof-in-predicate-list>
+ <#bad-eof-in-long-string>
<#bad-eof-in-string>
<#bad-eof-in-triple-quote>
<#bad-eof-in-uri>
+ <#bad-eof-in-uri-scheme>
<#bad-escape>
<#bad-ext-namedblank-op>
<#bad-hex-digit>
@@ -186,6 +190,16 @@
mf:name "bad-dot-after-subject" ;
mf:action <bad-dot-after-subject.ttl> .
+<#bad-eof-after-quotes>
+ rdf:type rdft:TestTurtleNegativeSyntax ;
+ mf:name "bad-eof-after-quotes" ;
+ mf:action <bad-eof-after-quotes.ttl> .
+
+<#bad-eof-at-string-start>
+ rdf:type rdft:TestTurtleNegativeSyntax ;
+ mf:name "bad-eof-at-string-start" ;
+ mf:action <bad-eof-at-string-start.ttl> .
+
<#bad-eof-in-blank>
rdf:type rdft:TestTurtleNegativeSyntax ;
mf:name "bad-eof-in-blank" ;
@@ -226,6 +240,11 @@
mf:name "bad-eof-in-predicate-list" ;
mf:action <bad-eof-in-predicate-list.ttl> .
+<#bad-eof-in-long-string>
+ rdf:type rdft:TestTurtleNegativeSyntax ;
+ mf:name "bad-eof-in-long-string" ;
+ mf:action <bad-eof-in-long-string.ttl> .
+
<#bad-eof-in-string>
rdf:type rdft:TestTurtleNegativeSyntax ;
mf:name "bad-eof-in-string" ;
@@ -241,6 +260,11 @@
mf:name "bad-eof-in-uri" ;
mf:action <bad-eof-in-uri.ttl> .
+<#bad-eof-in-uri-scheme>
+ rdf:type rdft:TestNTriplesNegativeSyntax ;
+ mf:name "bad-eof-in-uri-scheme" ;
+ mf:action <bad-eof-in-uri-scheme.nt> .
+
<#bad-escape>
rdf:type rdft:TestTurtleNegativeSyntax ;
mf:name "bad-escape" ;