aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2021-06-29 18:16:44 -0400
committerDavid Robillard <d@drobilla.net>2022-01-14 19:37:51 -0500
commitdaf4a7574d1977567c3da3d7fa561710139eb052 (patch)
treebd07e1354ef80aa287e3f5ea963dd6360a02c134
parent8f6d68365e0dccba13c588dd4180ea18fc9cda09 (diff)
downloadserd-daf4a7574d1977567c3da3d7fa561710139eb052.tar.gz
serd-daf4a7574d1977567c3da3d7fa561710139eb052.tar.bz2
serd-daf4a7574d1977567c3da3d7fa561710139eb052.zip
Factor out NQuads reader
-rw-r--r--meson.build1
-rw-r--r--src/n3.c100
-rw-r--r--src/read_nquads.c144
-rw-r--r--src/read_nquads.h32
-rw-r--r--src/read_ntriples.c16
-rw-r--r--src/read_ntriples.h10
-rw-r--r--src/reader.c1
-rw-r--r--src/reader.h3
-rw-r--r--test/lax/manifest.ttl7
-rw-r--r--test/lax/test-bad-string.nq3
10 files changed, 200 insertions, 117 deletions
diff --git a/meson.build b/meson.build
index 07786b4b..c292ebb1 100644
--- a/meson.build
+++ b/meson.build
@@ -91,6 +91,7 @@ sources = [
'src/n3.c',
'src/node.c',
'src/nodes.c',
+ 'src/read_nquads.c',
'src/read_ntriples.c',
'src/read_utf8.c',
'src/reader.c',
diff --git a/src/n3.c b/src/n3.c
index 777c83d3..c2ed8f72 100644
--- a/src/n3.c
+++ b/src/n3.c
@@ -32,12 +32,6 @@
#include <stdio.h>
#include <string.h>
-static bool
-fancy_syntax(const SerdReader* const reader)
-{
- return reader->syntax == SERD_TURTLE || reader->syntax == SERD_TRIG;
-}
-
static SerdStatus
read_collection(SerdReader* reader, ReadContext ctx, SerdNode** dest);
@@ -156,11 +150,6 @@ read_String(SerdReader* const reader, SerdNode* const node)
return SERD_SUCCESS;
}
- if (!fancy_syntax(reader)) {
- return r_err(
- reader, SERD_ERR_BAD_SYNTAX, "syntax does not support long literals");
- }
-
eat_byte_safe(reader, q3);
return read_STRING_LITERAL_LONG(reader, node, (uint8_t)q1);
}
@@ -386,10 +375,6 @@ resolve_IRIREF(SerdReader* const reader,
static SerdStatus
read_IRIREF(SerdReader* const reader, SerdNode** const dest)
{
- if (!fancy_syntax(reader)) {
- return read_IRI(reader, dest);
- }
-
SerdStatus st = SERD_SUCCESS;
if ((st = eat_byte_check(reader, '<'))) {
return st;
@@ -707,20 +692,6 @@ read_object(SerdReader* const reader,
bool simple = (ctx->subject != 0);
SerdNode* o = 0;
const int c = peek_byte(reader);
- if (!fancy_syntax(reader)) {
- switch (c) {
- case '"':
- case ':':
- case '<':
- case '_':
- break;
- case '$':
- case '?':
- break;
- default:
- return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected: ':', '<', or '_'");
- }
- }
switch (c) {
case EOF:
@@ -800,11 +771,9 @@ read_object(SerdReader* const reader,
}
}
+ ctx->object = o;
if (!ret && emit && simple && o) {
ret = emit_statement(reader, *ctx, o);
- } else if (!ret && !emit) {
- ctx->object = o;
- return SERD_SUCCESS;
}
serd_stack_pop_to(&reader->stack, orig_stack_size);
@@ -1279,70 +1248,3 @@ read_turtleTrigDoc(SerdReader* const reader)
return SERD_SUCCESS;
}
-
-SerdStatus
-read_nquadsDoc(SerdReader* const reader)
-{
- SerdStatus st = SERD_SUCCESS;
- while (!st && !reader->source->eof) {
- const size_t orig_stack_size = reader->stack.size;
-
- SerdStatementFlags flags = 0;
- ReadContext ctx = {0, 0, 0, 0, &flags};
- bool ate_dot = false;
- int s_type = 0;
- read_ws_star(reader);
- if (peek_byte(reader) == EOF) {
- break;
- }
-
- if (peek_byte(reader) == '@') {
- r_err(reader, SERD_ERR_BAD_SYNTAX, "syntax does not support directives");
- return SERD_ERR_BAD_SYNTAX;
- }
-
- if ((st = read_subject(reader, ctx, &ctx.subject, &s_type)) ||
- !read_ws_star(reader)) {
- return st;
- }
-
- switch (peek_byte(reader)) {
- case '$':
- case '?':
- st = read_Var(reader, &ctx.predicate);
- break;
- case '<':
- st = read_IRIREF(reader, &ctx.predicate);
- break;
- }
-
- if (st || !read_ws_star(reader) ||
- (st = read_object(reader, &ctx, false, &ate_dot))) {
- return st;
- }
-
- if (!ate_dot) { // graphLabel?
- read_ws_star(reader);
- switch (peek_byte(reader)) {
- case '.':
- break;
- case '?':
- TRY(st, read_Var(reader, &ctx.graph));
- break;
- case '_':
- TRY(st, read_BLANK_NODE_LABEL(reader, &ctx.graph, &ate_dot));
- break;
- default:
- TRY(st, read_IRIREF(reader, &ctx.graph));
- }
-
- // Terminating '.'
- read_ws_star(reader);
- TRY(st, eat_byte_check(reader, '.'));
- }
-
- st = emit_statement(reader, ctx, ctx.object);
- serd_stack_pop_to(&reader->stack, orig_stack_size);
- }
- return st;
-}
diff --git a/src/read_nquads.c b/src/read_nquads.c
new file mode 100644
index 00000000..b3f7a033
--- /dev/null
+++ b/src/read_nquads.c
@@ -0,0 +1,144 @@
+/*
+ Copyright 2011-2021 David Robillard <d@drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#include "read_nquads.h"
+
+#include "byte_source.h"
+#include "caret.h"
+#include "node.h"
+#include "read_ntriples.h"
+#include "reader.h"
+#include "stack.h"
+#include "statement.h"
+
+#include "serd/serd.h"
+
+#include <stdbool.h>
+#include <stdio.h>
+
+/// [6] graphLabel
+static SerdStatus
+read_graphLabel(SerdReader* const reader, SerdNode** const dest)
+{
+ return read_nt_subject(reader, dest); // Equivalent rule
+}
+
+/// [2] statement
+static SerdStatus
+read_statement(SerdReader* const reader)
+{
+ SerdStatementFlags flags = 0;
+ ReadContext ctx = {0, 0, 0, 0, &flags};
+ SerdStatus st = SERD_SUCCESS;
+ bool ate_dot = false;
+
+ // Read subject and predicate
+ if ((st = read_nt_subject(reader, &ctx.subject)) ||
+ (st = skip_horizontal_whitespace(reader)) ||
+ (st = read_nt_predicate(reader, &ctx.predicate)) ||
+ (st = skip_horizontal_whitespace(reader))) {
+ return st;
+ }
+
+ // Preserve the caret for error reporting and read object
+ SerdCaret orig_caret = reader->source->caret;
+ if ((st = read_nt_object(reader, &ctx.object, &ate_dot)) ||
+ (st = skip_horizontal_whitespace(reader))) {
+ return st;
+ }
+
+ if (!ate_dot) {
+ if (peek_byte(reader) == '.') {
+ eat_byte(reader);
+ } else {
+ if ((st = read_graphLabel(reader, &ctx.graph))) {
+ return st;
+ }
+
+ skip_horizontal_whitespace(reader);
+ if ((st = eat_byte_check(reader, '.'))) {
+ return st;
+ }
+ }
+ }
+
+ serd_node_zero_pad(ctx.object);
+ const SerdStatement statement = {
+ {ctx.subject, ctx.predicate, ctx.object, ctx.graph}, &orig_caret};
+
+ return serd_sink_write_statement(reader->sink, *ctx.flags, &statement);
+}
+
+static SerdStatus
+read_line(SerdReader* const reader)
+{
+ SerdStatus st = SERD_SUCCESS;
+
+ skip_horizontal_whitespace(reader);
+
+ switch (peek_byte(reader)) {
+ case EOF:
+ return SERD_FAILURE;
+
+ case '\n':
+ case '\r':
+ return read_EOL(reader);
+
+ case '#':
+ st = read_comment(reader);
+ break;
+
+ default:
+ if (!(st = read_statement(reader))) {
+ skip_horizontal_whitespace(reader);
+ if (peek_byte(reader) == '#') {
+ st = read_comment(reader);
+ }
+ }
+ break;
+ }
+
+ return (st || peek_byte(reader) == EOF) ? st : read_EOL(reader);
+}
+
+/// [1] nquadsDoc
+SerdStatus
+read_nquadsDoc(SerdReader* const reader)
+{
+ // Record the initial stack size and read the first line
+ const size_t orig_stack_size = reader->stack.size;
+ SerdStatus st = read_line(reader);
+
+ // Return early if we failed to read anything at all
+ serd_stack_pop_to(&reader->stack, orig_stack_size);
+ if (st == SERD_FAILURE || !tolerate_status(reader, st)) {
+ return st;
+ }
+
+ // Continue reading lines for as long as possible
+ for (st = SERD_SUCCESS; !st;) {
+ st = read_line(reader);
+ serd_stack_pop_to(&reader->stack, orig_stack_size);
+
+ if (st > SERD_FAILURE && !reader->strict && tolerate_status(reader, st)) {
+ skip_until(reader, '\n');
+ st = SERD_SUCCESS;
+ }
+ }
+
+ // If we made it this far, we succeeded at reading at least one line
+ return st > SERD_FAILURE ? st : SERD_SUCCESS;
+}
diff --git a/src/read_nquads.h b/src/read_nquads.h
new file mode 100644
index 00000000..ca18cb4b
--- /dev/null
+++ b/src/read_nquads.h
@@ -0,0 +1,32 @@
+/*
+ Copyright 2011-2021 David Robillard <d@drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#ifndef SERD_READ_NQUADS_H
+#define SERD_READ_NQUADS_H
+
+#include "serd/serd.h"
+
+// Nonterminals
+
+/**
+ Read a complete NQuads document.
+
+ RDF 1.1 NQuads: [1] nquadsDoc
+*/
+SerdStatus
+read_nquadsDoc(SerdReader* reader);
+
+#endif // SERD_READ_NQUADS_H
diff --git a/src/read_ntriples.c b/src/read_ntriples.c
index a0982cfa..78e46634 100644
--- a/src/read_ntriples.c
+++ b/src/read_ntriples.c
@@ -178,18 +178,24 @@ read_IRIREF_suffix(SerdReader* const reader, SerdNode* const node)
return tolerate_status(reader, st) ? SERD_SUCCESS : st;
}
-SerdStatus
+/**
+ Read an absolute IRI.
+
+ This is a stricter subset of [8] IRIREF in the NTriples grammar, since a
+ scheme is required. Handling this in the parser results in better error
+ messages.
+*/
+static SerdStatus
read_IRI(SerdReader* const reader, SerdNode** const dest)
{
- SerdStatus st = SERD_SUCCESS;
- if ((st = eat_byte_check(reader, '<'))) {
- return st;
- }
+ assert(peek_byte(reader) == '<');
+ eat_byte(reader);
if (!(*dest = push_node(reader, SERD_URI, "", 0))) {
return SERD_ERR_OVERFLOW;
}
+ SerdStatus st = SERD_SUCCESS;
if ((st = read_IRI_scheme(reader, *dest))) {
return r_err(reader, st, "expected IRI scheme");
}
diff --git a/src/read_ntriples.h b/src/read_ntriples.h
index d3a74924..a2f6d232 100644
--- a/src/read_ntriples.h
+++ b/src/read_ntriples.h
@@ -75,16 +75,6 @@ SerdStatus
read_EOL(SerdReader* reader);
/**
- Read an absolute IRI.
-
- This is a stricter subset of [8] IRIREF in the NTriples grammar, since a
- scheme is required. Handling this in the parser results in better error
- messages.
-*/
-SerdStatus
-read_IRI(SerdReader* reader, SerdNode** dest);
-
-/**
Read an IRI reference suffix into an existing node.
RDF 1.1 NTriples: [8] IRIREF
diff --git a/src/reader.c b/src/reader.c
index 5cd4f955..6ce40452 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -19,6 +19,7 @@
#include "byte_source.h"
#include "namespaces.h"
#include "node.h"
+#include "read_nquads.h"
#include "read_ntriples.h"
#include "stack.h"
#include "statement.h"
diff --git a/src/reader.h b/src/reader.h
index d9347dee..30fd69a9 100644
--- a/src/reader.h
+++ b/src/reader.h
@@ -101,9 +101,6 @@ SerdStatus
read_n3_statement(SerdReader* reader);
SerdStatus
-read_nquadsDoc(SerdReader* reader);
-
-SerdStatus
read_turtleTrigDoc(SerdReader* reader);
static inline int
diff --git a/test/lax/manifest.ttl b/test/lax/manifest.ttl
index 0d370f42..4342471c 100644
--- a/test/lax/manifest.ttl
+++ b/test/lax/manifest.ttl
@@ -9,6 +9,7 @@
mf:entries (
<#test-bad-string-ttl>
<#test-bad-string-nt>
+ <#test-bad-string-nq>
<#test-bad-uri-ttl>
<#test-bad-uri-nt>
<#test-bad-uri-nq>
@@ -31,6 +32,12 @@
mf:action <test-bad-string.nt> ;
mf:result <test-bad-string-out.nt> .
+<#test-bad-string-nq>
+ rdf:type rdft:TestNQuadsNegativeSynqax ;
+ mf:name "test-bad-string-nq" ;
+ mf:action <test-bad-string.nq> ;
+ mf:result <test-bad-string-out.nq> .
+
<#test-bad-uri-ttl>
rdf:type rdft:TestTurtleNegativeSyntax ;
mf:name "test-bad-uri-ttl" ;
diff --git a/test/lax/test-bad-string.nq b/test/lax/test-bad-string.nq
new file mode 100644
index 00000000..72eb9621
--- /dev/null
+++ b/test/lax/test-bad-string.nq
@@ -0,0 +1,3 @@
+<http://example.org/s1> <http://example.org/p1> "Truncated line
+<http://example.org/s1> <http://example.org/p1> "Bad escape \? " .
+<http://example.org/s1> <http://example.org/p2> "Good" .