aboutsummaryrefslogtreecommitdiffstats
path: root/src/read_trig.c
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2023-09-10 13:39:56 -0400
committerDavid Robillard <d@drobilla.net>2023-12-02 18:49:07 -0500
commitd80b2a1c153f81685d026f8e09c12175abed3d03 (patch)
tree8207788e58ee9de94ca224433a02a88e8fed3801 /src/read_trig.c
parent76fabe903adb09e6fe4d084ff6be49115285e086 (diff)
downloadserd-d80b2a1c153f81685d026f8e09c12175abed3d03.tar.gz
serd-d80b2a1c153f81685d026f8e09c12175abed3d03.tar.bz2
serd-d80b2a1c153f81685d026f8e09c12175abed3d03.zip
Separate Turtle and TriG reading code
Diffstat (limited to 'src/read_trig.c')
-rw-r--r--src/read_trig.c203
1 files changed, 203 insertions, 0 deletions
diff --git a/src/read_trig.c b/src/read_trig.c
new file mode 100644
index 00000000..45ac39a0
--- /dev/null
+++ b/src/read_trig.c
@@ -0,0 +1,203 @@
+// Copyright 2011-2023 David Robillard <d@drobilla.net>
+// SPDX-License-Identifier: ISC
+
+#include "read_trig.h"
+#include "read_ntriples.h"
+#include "read_turtle.h"
+#include "reader.h"
+#include "stack.h"
+#include "try.h"
+
+#include "serd/node.h"
+#include "serd/reader.h"
+#include "serd/statement.h"
+#include "serd/status.h"
+
+#include <stdbool.h>
+#include <stdio.h>
+
+static SerdStatus
+read_wrappedGraph(SerdReader* const reader, ReadContext* const ctx)
+{
+ SerdStatus st = SERD_SUCCESS;
+ TRY(st, eat_byte_check(reader, '{'));
+ read_turtle_ws_star(reader);
+
+ while (peek_byte(reader) != '}') {
+ const size_t orig_stack_size = reader->stack.size;
+ bool ate_dot = false;
+ int s_type = 0;
+
+ ctx->subject = 0;
+ if ((st = read_turtle_subject(reader, *ctx, &ctx->subject, &s_type))) {
+ return r_err(reader, st, "expected subject");
+ }
+
+ if ((st = read_turtle_triples(reader, *ctx, &ate_dot)) && s_type != '[') {
+ return r_err(reader, st, "bad predicate object list");
+ }
+
+ serd_stack_pop_to(&reader->stack, orig_stack_size);
+ read_turtle_ws_star(reader);
+ if (peek_byte(reader) == '.') {
+ skip_byte(reader, '.');
+ }
+ read_turtle_ws_star(reader);
+ }
+
+ skip_byte(reader, '}');
+ read_turtle_ws_star(reader);
+ if (peek_byte(reader) == '.') {
+ return r_err(reader, SERD_BAD_SYNTAX, "graph followed by '.'");
+ }
+
+ return SERD_SUCCESS;
+}
+
+static SerdStatus
+read_labelOrSubject(SerdReader* const reader, SerdNode** const dest)
+{
+ SerdStatus st = SERD_SUCCESS;
+ bool ate_dot = false;
+
+ switch (peek_byte(reader)) {
+ case '[':
+ skip_byte(reader, '[');
+ read_turtle_ws_star(reader);
+ TRY(st, eat_byte_check(reader, ']'));
+ *dest = blank_id(reader);
+ return *dest ? SERD_SUCCESS : SERD_BAD_STACK;
+ case '_':
+ return read_BLANK_NODE_LABEL(reader, dest, &ate_dot);
+ default:
+ if (!read_turtle_iri(reader, dest, &ate_dot)) {
+ return SERD_SUCCESS;
+ } else {
+ return r_err(reader, SERD_BAD_SYNTAX, "expected label or subject");
+ }
+ }
+}
+
+static SerdStatus
+read_sparql_directive(SerdReader* const reader,
+ ReadContext* const ctx,
+ const SerdNode* const token)
+{
+ if (!tokcmp(token, "base", 4)) {
+ return read_turtle_base(reader, true, false);
+ }
+
+ if (!tokcmp(token, "prefix", 6)) {
+ return read_turtle_prefixID(reader, true, false);
+ }
+
+ if (!tokcmp(token, "graph", 5)) {
+ SerdStatus st = SERD_SUCCESS;
+ read_turtle_ws_star(reader);
+ TRY(st, read_labelOrSubject(reader, &ctx->graph));
+ read_turtle_ws_star(reader);
+ return read_wrappedGraph(reader, ctx);
+ }
+
+ return SERD_FAILURE;
+}
+
+static SerdStatus
+read_block(SerdReader* const reader, ReadContext* const ctx)
+{
+ SerdStatus st = SERD_SUCCESS;
+
+ // Try to read a subject, though it may actually be a directive or graph name
+ SerdNode* token = NULL;
+ int s_type = 0;
+ if ((st = read_turtle_subject(reader, *ctx, &token, &s_type)) >
+ SERD_FAILURE) {
+ return st;
+ }
+
+ // Try to interpret as a SPARQL "PREFIX" or "BASE" directive
+ if (st && (st = read_sparql_directive(reader, ctx, token)) != SERD_FAILURE) {
+ return st;
+ }
+
+ // Try to interpret as a named TriG graph like "graphname { ..."
+ read_turtle_ws_star(reader);
+ if (peek_byte(reader) == '{') {
+ if (s_type == '(' || (s_type == '[' && !*ctx->flags)) {
+ return r_err(reader, SERD_BAD_SYNTAX, "invalid graph name");
+ }
+
+ ctx->graph = token;
+ return read_wrappedGraph(reader, ctx);
+ }
+
+ if (st) {
+ return r_err(reader, SERD_BAD_SYNTAX, "expected directive or subject");
+ }
+
+ // Our token is really a subject, read some triples
+ bool ate_dot = false;
+ ctx->subject = token;
+ if ((st = read_turtle_triples(reader, *ctx, &ate_dot)) > SERD_FAILURE) {
+ return st;
+ }
+
+ // "Failure" is only allowed for anonymous subjects like "[ ... ] ."
+ if (st && s_type != '[') {
+ return r_err(reader, SERD_BAD_SYNTAX, "expected triples");
+ }
+
+ // Ensure that triples are properly terminated
+ return ate_dot ? st : eat_byte_check(reader, '.');
+}
+
+SerdStatus
+read_trig_statement(SerdReader* const reader)
+{
+ SerdStatementFlags flags = 0;
+ ReadContext ctx = {0, 0, 0, 0, &flags};
+
+ // Handle nice cases we can distinguish from the next byte
+ read_turtle_ws_star(reader);
+ switch (peek_byte(reader)) {
+ case EOF:
+ return SERD_FAILURE;
+
+ case '\0':
+ eat_byte(reader);
+ return SERD_FAILURE;
+
+ case '@':
+ return read_turtle_directive(reader);
+
+ case '{':
+ return read_wrappedGraph(reader, &ctx);
+
+ default:
+ break;
+ }
+
+ // No such luck, figure out what to read from the first token
+ return read_block(reader, &ctx);
+}
+
+SerdStatus
+read_trigDoc(SerdReader* const reader)
+{
+ while (!reader->source.eof) {
+ const size_t orig_stack_size = reader->stack.size;
+ const SerdStatus st = read_trig_statement(reader);
+
+ if (st > SERD_FAILURE) {
+ if (!tolerate_status(reader, st)) {
+ serd_stack_pop_to(&reader->stack, orig_stack_size);
+ return st;
+ }
+ serd_reader_skip_until_byte(reader, '\n');
+ }
+
+ serd_stack_pop_to(&reader->stack, orig_stack_size);
+ }
+
+ return SERD_SUCCESS;
+}