From daf4a7574d1977567c3da3d7fa561710139eb052 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Tue, 29 Jun 2021 18:16:44 -0400 Subject: Factor out NQuads reader --- src/n3.c | 100 +----------------------------------- src/read_nquads.c | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/read_nquads.h | 32 ++++++++++++ src/read_ntriples.c | 16 ++++-- src/read_ntriples.h | 10 ---- src/reader.c | 1 + src/reader.h | 3 -- 7 files changed, 189 insertions(+), 117 deletions(-) create mode 100644 src/read_nquads.c create mode 100644 src/read_nquads.h (limited to 'src') diff --git a/src/n3.c b/src/n3.c index 777c83d3..c2ed8f72 100644 --- a/src/n3.c +++ b/src/n3.c @@ -32,12 +32,6 @@ #include #include -static bool -fancy_syntax(const SerdReader* const reader) -{ - return reader->syntax == SERD_TURTLE || reader->syntax == SERD_TRIG; -} - static SerdStatus read_collection(SerdReader* reader, ReadContext ctx, SerdNode** dest); @@ -156,11 +150,6 @@ read_String(SerdReader* const reader, SerdNode* const node) return SERD_SUCCESS; } - if (!fancy_syntax(reader)) { - return r_err( - reader, SERD_ERR_BAD_SYNTAX, "syntax does not support long literals"); - } - eat_byte_safe(reader, q3); return read_STRING_LITERAL_LONG(reader, node, (uint8_t)q1); } @@ -386,10 +375,6 @@ resolve_IRIREF(SerdReader* const reader, static SerdStatus read_IRIREF(SerdReader* const reader, SerdNode** const dest) { - if (!fancy_syntax(reader)) { - return read_IRI(reader, dest); - } - SerdStatus st = SERD_SUCCESS; if ((st = eat_byte_check(reader, '<'))) { return st; @@ -707,20 +692,6 @@ read_object(SerdReader* const reader, bool simple = (ctx->subject != 0); SerdNode* o = 0; const int c = peek_byte(reader); - if (!fancy_syntax(reader)) { - switch (c) { - case '"': - case ':': - case '<': - case '_': - break; - case '$': - case '?': - break; - default: - return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected: ':', '<', or '_'"); - } - } switch (c) { case EOF: @@ -800,11 +771,9 @@ read_object(SerdReader* const reader, } } + ctx->object = o; if (!ret && emit && simple && o) { ret = emit_statement(reader, *ctx, o); - } else if (!ret && !emit) { - ctx->object = o; - return SERD_SUCCESS; } serd_stack_pop_to(&reader->stack, orig_stack_size); @@ -1279,70 +1248,3 @@ read_turtleTrigDoc(SerdReader* const reader) return SERD_SUCCESS; } - -SerdStatus -read_nquadsDoc(SerdReader* const reader) -{ - SerdStatus st = SERD_SUCCESS; - while (!st && !reader->source->eof) { - const size_t orig_stack_size = reader->stack.size; - - SerdStatementFlags flags = 0; - ReadContext ctx = {0, 0, 0, 0, &flags}; - bool ate_dot = false; - int s_type = 0; - read_ws_star(reader); - if (peek_byte(reader) == EOF) { - break; - } - - if (peek_byte(reader) == '@') { - r_err(reader, SERD_ERR_BAD_SYNTAX, "syntax does not support directives"); - return SERD_ERR_BAD_SYNTAX; - } - - if ((st = read_subject(reader, ctx, &ctx.subject, &s_type)) || - !read_ws_star(reader)) { - return st; - } - - switch (peek_byte(reader)) { - case '$': - case '?': - st = read_Var(reader, &ctx.predicate); - break; - case '<': - st = read_IRIREF(reader, &ctx.predicate); - break; - } - - if (st || !read_ws_star(reader) || - (st = read_object(reader, &ctx, false, &ate_dot))) { - return st; - } - - if (!ate_dot) { // graphLabel? - read_ws_star(reader); - switch (peek_byte(reader)) { - case '.': - break; - case '?': - TRY(st, read_Var(reader, &ctx.graph)); - break; - case '_': - TRY(st, read_BLANK_NODE_LABEL(reader, &ctx.graph, &ate_dot)); - break; - default: - TRY(st, read_IRIREF(reader, &ctx.graph)); - } - - // Terminating '.' - read_ws_star(reader); - TRY(st, eat_byte_check(reader, '.')); - } - - st = emit_statement(reader, ctx, ctx.object); - serd_stack_pop_to(&reader->stack, orig_stack_size); - } - return st; -} diff --git a/src/read_nquads.c b/src/read_nquads.c new file mode 100644 index 00000000..b3f7a033 --- /dev/null +++ b/src/read_nquads.c @@ -0,0 +1,144 @@ +/* + Copyright 2011-2021 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "read_nquads.h" + +#include "byte_source.h" +#include "caret.h" +#include "node.h" +#include "read_ntriples.h" +#include "reader.h" +#include "stack.h" +#include "statement.h" + +#include "serd/serd.h" + +#include +#include + +/// [6] graphLabel +static SerdStatus +read_graphLabel(SerdReader* const reader, SerdNode** const dest) +{ + return read_nt_subject(reader, dest); // Equivalent rule +} + +/// [2] statement +static SerdStatus +read_statement(SerdReader* const reader) +{ + SerdStatementFlags flags = 0; + ReadContext ctx = {0, 0, 0, 0, &flags}; + SerdStatus st = SERD_SUCCESS; + bool ate_dot = false; + + // Read subject and predicate + if ((st = read_nt_subject(reader, &ctx.subject)) || + (st = skip_horizontal_whitespace(reader)) || + (st = read_nt_predicate(reader, &ctx.predicate)) || + (st = skip_horizontal_whitespace(reader))) { + return st; + } + + // Preserve the caret for error reporting and read object + SerdCaret orig_caret = reader->source->caret; + if ((st = read_nt_object(reader, &ctx.object, &ate_dot)) || + (st = skip_horizontal_whitespace(reader))) { + return st; + } + + if (!ate_dot) { + if (peek_byte(reader) == '.') { + eat_byte(reader); + } else { + if ((st = read_graphLabel(reader, &ctx.graph))) { + return st; + } + + skip_horizontal_whitespace(reader); + if ((st = eat_byte_check(reader, '.'))) { + return st; + } + } + } + + serd_node_zero_pad(ctx.object); + const SerdStatement statement = { + {ctx.subject, ctx.predicate, ctx.object, ctx.graph}, &orig_caret}; + + return serd_sink_write_statement(reader->sink, *ctx.flags, &statement); +} + +static SerdStatus +read_line(SerdReader* const reader) +{ + SerdStatus st = SERD_SUCCESS; + + skip_horizontal_whitespace(reader); + + switch (peek_byte(reader)) { + case EOF: + return SERD_FAILURE; + + case '\n': + case '\r': + return read_EOL(reader); + + case '#': + st = read_comment(reader); + break; + + default: + if (!(st = read_statement(reader))) { + skip_horizontal_whitespace(reader); + if (peek_byte(reader) == '#') { + st = read_comment(reader); + } + } + break; + } + + return (st || peek_byte(reader) == EOF) ? st : read_EOL(reader); +} + +/// [1] nquadsDoc +SerdStatus +read_nquadsDoc(SerdReader* const reader) +{ + // Record the initial stack size and read the first line + const size_t orig_stack_size = reader->stack.size; + SerdStatus st = read_line(reader); + + // Return early if we failed to read anything at all + serd_stack_pop_to(&reader->stack, orig_stack_size); + if (st == SERD_FAILURE || !tolerate_status(reader, st)) { + return st; + } + + // Continue reading lines for as long as possible + for (st = SERD_SUCCESS; !st;) { + st = read_line(reader); + serd_stack_pop_to(&reader->stack, orig_stack_size); + + if (st > SERD_FAILURE && !reader->strict && tolerate_status(reader, st)) { + skip_until(reader, '\n'); + st = SERD_SUCCESS; + } + } + + // If we made it this far, we succeeded at reading at least one line + return st > SERD_FAILURE ? st : SERD_SUCCESS; +} diff --git a/src/read_nquads.h b/src/read_nquads.h new file mode 100644 index 00000000..ca18cb4b --- /dev/null +++ b/src/read_nquads.h @@ -0,0 +1,32 @@ +/* + Copyright 2011-2021 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_READ_NQUADS_H +#define SERD_READ_NQUADS_H + +#include "serd/serd.h" + +// Nonterminals + +/** + Read a complete NQuads document. + + RDF 1.1 NQuads: [1] nquadsDoc +*/ +SerdStatus +read_nquadsDoc(SerdReader* reader); + +#endif // SERD_READ_NQUADS_H diff --git a/src/read_ntriples.c b/src/read_ntriples.c index a0982cfa..78e46634 100644 --- a/src/read_ntriples.c +++ b/src/read_ntriples.c @@ -178,18 +178,24 @@ read_IRIREF_suffix(SerdReader* const reader, SerdNode* const node) return tolerate_status(reader, st) ? SERD_SUCCESS : st; } -SerdStatus +/** + Read an absolute IRI. + + This is a stricter subset of [8] IRIREF in the NTriples grammar, since a + scheme is required. Handling this in the parser results in better error + messages. +*/ +static SerdStatus read_IRI(SerdReader* const reader, SerdNode** const dest) { - SerdStatus st = SERD_SUCCESS; - if ((st = eat_byte_check(reader, '<'))) { - return st; - } + assert(peek_byte(reader) == '<'); + eat_byte(reader); if (!(*dest = push_node(reader, SERD_URI, "", 0))) { return SERD_ERR_OVERFLOW; } + SerdStatus st = SERD_SUCCESS; if ((st = read_IRI_scheme(reader, *dest))) { return r_err(reader, st, "expected IRI scheme"); } diff --git a/src/read_ntriples.h b/src/read_ntriples.h index d3a74924..a2f6d232 100644 --- a/src/read_ntriples.h +++ b/src/read_ntriples.h @@ -74,16 +74,6 @@ read_LANGTAG(SerdReader* reader); SerdStatus read_EOL(SerdReader* reader); -/** - Read an absolute IRI. - - This is a stricter subset of [8] IRIREF in the NTriples grammar, since a - scheme is required. Handling this in the parser results in better error - messages. -*/ -SerdStatus -read_IRI(SerdReader* reader, SerdNode** dest); - /** Read an IRI reference suffix into an existing node. diff --git a/src/reader.c b/src/reader.c index 5cd4f955..6ce40452 100644 --- a/src/reader.c +++ b/src/reader.c @@ -19,6 +19,7 @@ #include "byte_source.h" #include "namespaces.h" #include "node.h" +#include "read_nquads.h" #include "read_ntriples.h" #include "stack.h" #include "statement.h" diff --git a/src/reader.h b/src/reader.h index d9347dee..30fd69a9 100644 --- a/src/reader.h +++ b/src/reader.h @@ -100,9 +100,6 @@ emit_statement(SerdReader* reader, ReadContext ctx, SerdNode* o); SerdStatus read_n3_statement(SerdReader* reader); -SerdStatus -read_nquadsDoc(SerdReader* reader); - SerdStatus read_turtleTrigDoc(SerdReader* reader); -- cgit v1.2.1