From fc3e5a0a7c9c64d275fec498f0e80dc02444c12d Mon Sep 17 00:00:00 2001 From: David Robillard Date: Wed, 18 Dec 2019 19:09:49 -0500 Subject: Add support for parsing variables This adds a reader flag and serdi option for extending a syntax with support for SPARQL-like variables, for storing things like patterns or simple queries. --- src/env.c | 1 + src/n3.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- src/node.c | 3 ++- src/reader.c | 1 + src/reader.h | 1 + src/serdi.c | 3 +++ src/writer.c | 14 +++++++++++ 7 files changed, 97 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/env.c b/src/env.c index 55901e26..a11abc70 100644 --- a/src/env.c +++ b/src/env.c @@ -355,6 +355,7 @@ serd_env_expand(const SerdEnv* env, const SerdNode* node) case SERD_CURIE: return expand_curie(env, node); case SERD_BLANK: + case SERD_VARIABLE: return serd_node_copy(node); } } diff --git a/src/n3.c b/src/n3.c index c9f2dd6b..1ae98a56 100644 --- a/src/n3.c +++ b/src/n3.c @@ -928,11 +928,54 @@ read_literal(SerdReader* reader, SerdNode** dest, bool* ate_dot) return SERD_SUCCESS; } +static SerdStatus +read_VARNAME(SerdReader* reader, SerdNode** dest) +{ + // Simplified from SPARQL: VARNAME ::= (PN_CHARS_U | [0-9])+ + SerdNode* n = *dest; + SerdStatus st = SERD_SUCCESS; + int c = 0; + peek_byte(reader); + while ((c = peek_byte(reader))) { + if (is_digit(c) || c == '_') { + push_byte(reader, n, eat_byte_safe(reader, c)); + } else if ((st = read_PN_CHARS(reader, n))) { + st = st > SERD_FAILURE ? st : SERD_SUCCESS; + break; + } + } + + return st; +} + +static SerdStatus +read_Var(SerdReader* reader, SerdNode** dest) +{ + if (!(reader->flags & SERD_READ_VARIABLES)) { + return r_err( + reader, SERD_ERR_BAD_SYNTAX, "syntax does not support variables\n"); + } + + if (!(*dest = push_node(reader, SERD_VARIABLE, "", 0))) { + return SERD_ERR_OVERFLOW; + } + + assert(peek_byte(reader) == '$' || peek_byte(reader) == '?'); + serd_byte_source_advance(reader->source); + + return read_VARNAME(reader, dest); +} + static SerdStatus read_verb(SerdReader* reader, SerdNode** dest) { const size_t orig_stack_size = reader->stack.size; - if (peek_byte(reader) == '<') { + + switch (peek_byte(reader)) { + case '$': + case '?': + return read_Var(reader, dest); + case '<': return read_IRIREF(reader, dest); } @@ -1110,6 +1153,12 @@ read_object(SerdReader* reader, ReadContext* ctx, bool emit, bool* ate_dot) case '<': case '_': break; + case '$': + case '?': + if (reader->flags & SERD_READ_VARIABLES) { + break; + } + break; default: return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected: ':', '<', or '_'\n"); } @@ -1119,6 +1168,10 @@ read_object(SerdReader* reader, ReadContext* ctx, bool emit, bool* ate_dot) case EOF: case ')': return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected object\n"); + case '$': + case '?': + ret = read_Var(reader, &o); + break; case '[': simple = false; ret = read_anon(reader, *ctx, false, &o); @@ -1341,6 +1394,10 @@ read_subject(SerdReader* reader, ReadContext ctx, SerdNode** dest, int* s_type) SerdStatus st = SERD_SUCCESS; bool ate_dot = false; switch ((*s_type = peek_byte(reader))) { + case '$': + case '?': + st = read_Var(reader, dest); + break; case '[': st = read_anon(reader, ctx, true, dest); break; @@ -1598,6 +1655,7 @@ read_n3_statement(SerdReader* reader) if (s_type == '(' || (s_type == '[' && !*ctx.flags)) { return r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid graph name\n"); } + ctx.graph = ctx.subject; ctx.subject = NULL; TRY(st, read_wrappedGraph(reader, &ctx)); @@ -1613,6 +1671,7 @@ read_n3_statement(SerdReader* reader) } return st > SERD_FAILURE ? st : SERD_ERR_BAD_SYNTAX; + } else if (!ate_dot) { read_ws_star(reader); st = (eat_byte_check(reader, '.') == '.') ? SERD_SUCCESS @@ -1671,10 +1730,22 @@ read_nquadsDoc(SerdReader* reader) return SERD_ERR_BAD_SYNTAX; } - // subject predicate object if ((st = read_subject(reader, ctx, &ctx.subject, &s_type)) || - !read_ws_star(reader) || (st = read_IRIREF(reader, &ctx.predicate)) || - !read_ws_star(reader) || + !read_ws_star(reader)) { + return st; + } + + switch (peek_byte(reader)) { + case '$': + case '?': + st = read_Var(reader, &ctx.predicate); + break; + case '<': + st = read_IRIREF(reader, &ctx.predicate); + break; + } + + if (st || !read_ws_star(reader) || (st = read_object(reader, &ctx, false, &ate_dot))) { return st; } diff --git a/src/node.c b/src/node.c index 14f6490b..4c76ed9b 100644 --- a/src/node.c +++ b/src/node.c @@ -165,7 +165,8 @@ serd_node_zero_pad(SerdNode* node) SerdNode* serd_new_simple_node(SerdNodeType type, const SerdStringView str) { - if (type != SERD_BLANK && type != SERD_CURIE && type != SERD_URI) { + if (type != SERD_BLANK && type != SERD_CURIE && type != SERD_URI && + type != SERD_VARIABLE) { return NULL; } diff --git a/src/reader.c b/src/reader.c index 1c9469af..4c4bffa5 100644 --- a/src/reader.c +++ b/src/reader.c @@ -172,6 +172,7 @@ serd_reader_new(SerdWorld* const world, me->sink = sink; me->stack = serd_stack_new(stack_size, sizeof(SerdNode)); me->syntax = syntax; + me->flags = flags; me->next_id = 1; me->strict = !(flags & SERD_READ_LAX); diff --git a/src/reader.h b/src/reader.h index 62feeee6..9bdbf72f 100644 --- a/src/reader.h +++ b/src/reader.h @@ -60,6 +60,7 @@ struct SerdReaderImpl { SerdByteSource* source; SerdStack stack; SerdSyntax syntax; + SerdReaderFlags flags; unsigned next_id; uint8_t* buf; char* bprefix; diff --git a/src/serdi.c b/src/serdi.c index 1925cd88..2801a2da 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -72,6 +72,7 @@ print_usage(const char* name, bool error) fprintf(os, " -s INPUT Parse INPUT as string.\n"); fprintf(os, " -t Write terser output without newlines.\n"); fprintf(os, " -v Display version information and exit.\n"); + fprintf(os, " -x Support parsing variable nodes like `?x'.\n"); return error ? 1 : 0; } @@ -246,6 +247,8 @@ main(int argc, char** argv) return missing_arg(argv[0], 'w'); } out_filename = argv[a]; + } else if (argv[a][1] == 'x') { + reader_flags |= SERD_READ_VARIABLES; } else { SERDI_ERRORF("invalid option -- '%s'\n", argv[a] + 1); return print_usage(argv[0], true); diff --git a/src/writer.c b/src/writer.c index b1404a91..fbc9dd5d 100644 --- a/src/writer.c +++ b/src/writer.c @@ -880,6 +880,18 @@ write_blank(SerdWriter* const writer, return st; } +SERD_WARN_UNUSED_RESULT static SerdStatus +write_variable(SerdWriter* const writer, const SerdNode* node) +{ + SerdStatus st = SERD_SUCCESS; + + TRY(st, esink("?", 1, writer)); + TRY(st, esink(serd_node_string(node), node->n_bytes, writer)); + + writer->last_sep = SEP_NONE; + return st; +} + SERD_WARN_UNUSED_RESULT static SerdStatus write_node(SerdWriter* writer, const SerdNode* node, @@ -895,6 +907,8 @@ write_node(SerdWriter* writer, return write_curie(writer, node, field, flags); case SERD_BLANK: return write_blank(writer, node, field, flags); + case SERD_VARIABLE: + return write_variable(writer, node); } return SERD_ERR_INTERNAL; -- cgit v1.2.1