aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--meson.build1
-rw-r--r--src/n3.c92
-rw-r--r--src/read_nquads.c124
-rw-r--r--src/read_nquads.h29
-rw-r--r--src/read_ntriples.c13
-rw-r--r--src/read_ntriples.h10
-rw-r--r--src/reader.c6
-rw-r--r--src/reader.h6
-rw-r--r--test/extra/lax/manifest.ttl7
-rw-r--r--test/extra/lax/test-bad-string.nq3
10 files changed, 178 insertions, 113 deletions
diff --git a/meson.build b/meson.build
index cd6c77f5..df3f237f 100644
--- a/meson.build
+++ b/meson.build
@@ -157,6 +157,7 @@ sources = files(
'src/env.c',
'src/n3.c',
'src/node.c',
+ 'src/read_nquads.c',
'src/read_ntriples.c',
'src/read_utf8.c',
'src/reader.c',
diff --git a/src/n3.c b/src/n3.c
index 2c64ab0f..dc133e69 100644
--- a/src/n3.c
+++ b/src/n3.c
@@ -23,12 +23,6 @@
#include <stdio.h>
#include <string.h>
-static bool
-fancy_syntax(const SerdReader* const reader)
-{
- return reader->syntax == SERD_TURTLE || reader->syntax == SERD_TRIG;
-}
-
static SerdStatus
read_collection(SerdReader* reader, ReadContext ctx, SerdNode** dest);
@@ -145,11 +139,6 @@ read_String(SerdReader* const reader, SerdNode* const node)
return SERD_SUCCESS;
}
- if (!fancy_syntax(reader)) {
- return r_err(
- reader, SERD_BAD_SYNTAX, "syntax does not support long literals");
- }
-
skip_byte(reader, q3);
return read_STRING_LITERAL_LONG(reader, node, (uint8_t)q1);
}
@@ -315,10 +304,6 @@ read_PN_PREFIX(SerdReader* const reader, SerdNode* const dest)
static SerdStatus
read_IRIREF(SerdReader* const reader, SerdNode** const dest)
{
- if (!fancy_syntax(reader)) {
- return read_IRI(reader, dest);
- }
-
SerdStatus st = SERD_SUCCESS;
TRY(st, eat_byte_check(reader, '<'));
@@ -589,17 +574,6 @@ read_object(SerdReader* const reader,
bool simple = (ctx->subject != 0);
SerdNode* o = 0;
const int c = peek_byte(reader);
- if (!fancy_syntax(reader)) {
- switch (c) {
- case '"':
- case ':':
- case '<':
- case '_':
- break;
- default:
- return r_err(reader, SERD_BAD_SYNTAX, "expected: ':', '<', or '_'");
- }
- }
switch (c) {
case EOF:
@@ -672,11 +646,9 @@ read_object(SerdReader* const reader,
}
}
+ ctx->object = o;
if (!st && emit && simple && o) {
st = emit_statement(reader, *ctx, o);
- } else if (!st && !emit) {
- ctx->object = o;
- return SERD_SUCCESS;
}
serd_stack_pop_to(&reader->stack, orig_stack_size);
@@ -1128,65 +1100,3 @@ read_turtleTrigDoc(SerdReader* const reader)
return SERD_SUCCESS;
}
-
-SerdStatus
-read_nquads_statement(SerdReader* const reader)
-{
- SerdStatus st = SERD_SUCCESS;
- SerdStatementFlags flags = 0;
- ReadContext ctx = {0, 0, 0, 0, &flags};
- bool ate_dot = false;
- int s_type = 0;
-
- read_ws_star(reader);
- if (peek_byte(reader) == EOF) {
- return SERD_FAILURE;
- }
-
- if (peek_byte(reader) == '@') {
- return r_err(reader, SERD_BAD_SYNTAX, "syntax does not support directives");
- }
-
- // subject predicate object
- if ((st = read_subject(reader, ctx, &ctx.subject, &s_type)) ||
- !read_ws_star(reader) || (st = read_IRIREF(reader, &ctx.predicate)) ||
- !read_ws_star(reader) ||
- (st = read_object(reader, &ctx, false, &ate_dot))) {
- return st;
- }
-
- if (!ate_dot) { // graphLabel?
- read_ws_star(reader);
- switch (peek_byte(reader)) {
- case '.':
- break;
- case '_':
- TRY(st, read_BLANK_NODE_LABEL(reader, &ctx.graph, &ate_dot));
- break;
- default:
- TRY(st, read_IRIREF(reader, &ctx.graph));
- }
-
- // Terminating '.'
- read_ws_star(reader);
- TRY(st, eat_byte_check(reader, '.'));
- }
-
- return emit_statement(reader, ctx, ctx.object);
-}
-
-SerdStatus
-read_nquadsDoc(SerdReader* const reader)
-{
- SerdStatus st = SERD_SUCCESS;
-
- while (!reader->source.eof && !st) {
- const size_t orig_stack_size = reader->stack.size;
-
- st = read_nquads_statement(reader);
-
- serd_stack_pop_to(&reader->stack, orig_stack_size);
- }
-
- return st;
-}
diff --git a/src/read_nquads.c b/src/read_nquads.c
new file mode 100644
index 00000000..b4e200d4
--- /dev/null
+++ b/src/read_nquads.c
@@ -0,0 +1,124 @@
+// Copyright 2011-2021 David Robillard <d@drobilla.net>
+// SPDX-License-Identifier: ISC
+
+#include "read_nquads.h"
+
+#include "caret.h"
+#include "node.h"
+#include "read_ntriples.h"
+#include "reader.h"
+#include "stack.h"
+#include "statement.h"
+#include "try.h"
+
+#include "serd/caret.h"
+#include "serd/node.h"
+#include "serd/sink.h"
+#include "serd/statement.h"
+
+#include <stdbool.h>
+#include <stdio.h>
+
+/// [6] graphLabel
+static SerdStatus
+read_graphLabel(SerdReader* const reader, SerdNode** const dest)
+{
+ return read_nt_subject(reader, dest); // Equivalent rule
+}
+
+/// [2] statement
+static SerdStatus
+read_nquads_statement(SerdReader* const reader)
+{
+ SerdStatementFlags flags = 0;
+ ReadContext ctx = {0, 0, 0, 0, &flags};
+ SerdStatus st = SERD_SUCCESS;
+ bool ate_dot = false;
+
+ // Read subject and predicate
+ if ((st = read_nt_subject(reader, &ctx.subject)) ||
+ (st = skip_horizontal_whitespace(reader)) ||
+ (st = read_nt_predicate(reader, &ctx.predicate)) ||
+ (st = skip_horizontal_whitespace(reader))) {
+ return st;
+ }
+
+ // Preserve the caret for error reporting and read object
+ SerdCaret orig_caret = reader->source.caret;
+ if ((st = read_nt_object(reader, &ctx.object, &ate_dot)) ||
+ (st = skip_horizontal_whitespace(reader))) {
+ return st;
+ }
+
+ if (!ate_dot) {
+ if (peek_byte(reader) == '.') {
+ eat_byte(reader);
+ } else {
+ TRY(st, read_graphLabel(reader, &ctx.graph));
+ skip_horizontal_whitespace(reader);
+ TRY(st, eat_byte_check(reader, '.'));
+ }
+ }
+
+ serd_node_zero_pad(ctx.object);
+ const SerdStatement statement = {
+ {ctx.subject, ctx.predicate, ctx.object, ctx.graph}, &orig_caret};
+
+ return serd_sink_write_statement(reader->sink, *ctx.flags, &statement);
+}
+
+SerdStatus
+read_nquads_line(SerdReader* const reader)
+{
+ const size_t orig_stack_size = reader->stack.size;
+ SerdStatus st = SERD_SUCCESS;
+
+ skip_horizontal_whitespace(reader);
+
+ switch (peek_byte(reader)) {
+ case EOF:
+ return SERD_FAILURE;
+
+ case '\n':
+ case '\r':
+ return read_EOL(reader);
+
+ case '#':
+ return read_comment(reader);
+
+ default:
+ if (!(st = read_nquads_statement(reader))) {
+ skip_horizontal_whitespace(reader);
+ if (peek_byte(reader) == '#') {
+ st = read_comment(reader);
+ }
+ }
+ break;
+ }
+
+ serd_stack_pop_to(&reader->stack, orig_stack_size);
+
+ return (st || peek_byte(reader) == EOF) ? st : read_EOL(reader);
+}
+
+SerdStatus
+read_nquadsDoc(SerdReader* const reader)
+{
+ // Read the first line
+ SerdStatus st = read_nquads_line(reader);
+ if (st == SERD_FAILURE || !tolerate_status(reader, st)) {
+ return st;
+ }
+
+ // Continue reading lines for as long as possible
+ for (st = SERD_SUCCESS; !st;) {
+ st = read_nquads_line(reader);
+ if (st > SERD_FAILURE && !reader->strict && tolerate_status(reader, st)) {
+ serd_reader_skip_until_byte(reader, '\n');
+ st = SERD_SUCCESS;
+ }
+ }
+
+ // If we made it this far, we succeeded at reading at least one line
+ return st > SERD_FAILURE ? st : SERD_SUCCESS;
+}
diff --git a/src/read_nquads.h b/src/read_nquads.h
new file mode 100644
index 00000000..571c6f96
--- /dev/null
+++ b/src/read_nquads.h
@@ -0,0 +1,29 @@
+// Copyright 2011-2021 David Robillard <d@drobilla.net>
+// SPDX-License-Identifier: ISC
+
+#ifndef SERD_SRC_READ_NQUADS_H
+#define SERD_SRC_READ_NQUADS_H
+
+#include "serd/reader.h"
+#include "serd/status.h"
+
+// Nonterminals
+
+/**
+ Read a single NQuads line.
+
+ May read a statement, but may also just skip some input like comments or
+ extra whitespace.
+*/
+SerdStatus
+read_nquads_line(SerdReader* reader);
+
+/**
+ Read a complete NQuads document.
+
+ RDF 1.1 NQuads: [1] nquadsDoc
+*/
+SerdStatus
+read_nquadsDoc(SerdReader* reader);
+
+#endif // SERD_SRC_READ_NQUADS_H
diff --git a/src/read_ntriples.c b/src/read_ntriples.c
index 3063a667..3d3af2ce 100644
--- a/src/read_ntriples.c
+++ b/src/read_ntriples.c
@@ -191,13 +191,18 @@ read_IRIREF_suffix(SerdReader* const reader, SerdNode* const node)
return tolerate_status(reader, st) ? SERD_SUCCESS : st;
}
-SerdStatus
+/**
+ Read an absolute IRI.
+
+ This is a stricter subset of [8] IRIREF in the NTriples grammar, since a
+ scheme is required. Handling this in the parser results in better error
+ messages.
+*/
+static SerdStatus
read_IRI(SerdReader* const reader, SerdNode** const dest)
{
SerdStatus st = SERD_SUCCESS;
- if ((st = eat_byte_check(reader, '<'))) {
- return st;
- }
+ TRY(st, eat_byte_check(reader, '<'));
if (!(*dest = push_node(reader, SERD_URI, "", 0))) {
return SERD_BAD_STACK;
diff --git a/src/read_ntriples.h b/src/read_ntriples.h
index bc76fed6..e6051fde 100644
--- a/src/read_ntriples.h
+++ b/src/read_ntriples.h
@@ -56,16 +56,6 @@ SerdStatus
read_EOL(SerdReader* reader);
/**
- Read an absolute IRI.
-
- This is a stricter subset of [8] IRIREF in the NTriples grammar, since a
- scheme is required. Handling this in the parser results in better error
- messages.
-*/
-SerdStatus
-read_IRI(SerdReader* reader, SerdNode** dest);
-
-/**
Read an IRI reference suffix into an existing node.
RDF 1.1 NTriples: [8] IRIREF
diff --git a/src/reader.c b/src/reader.c
index 057f1d7f..1c99b033 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -6,6 +6,7 @@
#include "byte_source.h"
#include "namespaces.h"
#include "node.h"
+#include "read_nquads.h"
#include "read_ntriples.h"
#include "stack.h"
#include "statement.h"
@@ -362,11 +363,12 @@ serd_reader_read_chunk(SerdReader* const reader)
if (peek_byte(reader) == 0) {
// Skip leading null byte, for reading from a null-delimited socket
- st = skip_byte(reader, 0);
+ serd_byte_source_advance(&reader->source);
+ return SERD_FAILURE;
}
return st ? st
- : (reader->syntax == SERD_NQUADS) ? read_nquads_statement(reader)
+ : (reader->syntax == SERD_NQUADS) ? read_nquads_line(reader)
: read_n3_statement(reader);
}
diff --git a/src/reader.h b/src/reader.h
index 559c9cee..0907d4cc 100644
--- a/src/reader.h
+++ b/src/reader.h
@@ -91,12 +91,6 @@ SerdStatus
read_n3_statement(SerdReader* reader);
SerdStatus
-read_nquads_statement(SerdReader* reader);
-
-SerdStatus
-read_nquadsDoc(SerdReader* reader);
-
-SerdStatus
read_turtleTrigDoc(SerdReader* reader);
static inline int
diff --git a/test/extra/lax/manifest.ttl b/test/extra/lax/manifest.ttl
index c68f0176..9291b7b3 100644
--- a/test/extra/lax/manifest.ttl
+++ b/test/extra/lax/manifest.ttl
@@ -6,6 +6,7 @@
a mf:Manifest ;
rdfs:comment "Serd lax parsing test suite" ;
mf:entries (
+ <#test-bad-string-nq>
<#test-bad-string-nt>
<#test-bad-string-ttl>
<#test-bad-uri-nq>
@@ -18,6 +19,12 @@
<#test-out-of-range-unicode>
) .
+<#test-bad-string-nq>
+ a rdft:TestNQuadsNegativeSyntax ;
+ mf:action <test-bad-string.nq> ;
+ mf:name "test-bad-string-nq" ;
+ mf:result <test-bad-string-out.nq> .
+
<#test-bad-string-nt>
a rdft:TestNTriplesNegativeSyntax ;
mf:action <test-bad-string.nt> ;
diff --git a/test/extra/lax/test-bad-string.nq b/test/extra/lax/test-bad-string.nq
new file mode 100644
index 00000000..72eb9621
--- /dev/null
+++ b/test/extra/lax/test-bad-string.nq
@@ -0,0 +1,3 @@
+<http://example.org/s1> <http://example.org/p1> "Truncated line
+<http://example.org/s1> <http://example.org/p1> "Bad escape \? " .
+<http://example.org/s1> <http://example.org/p2> "Good" .