From 7f508b89230b339684a4013dbbad741d9e16e40a Mon Sep 17 00:00:00 2001 From: David Robillard Date: Wed, 18 Dec 2019 19:09:49 -0500 Subject: Add support for parsing variables This adds a reader flag and serdi option for extending a syntax with support for SPARQL-like variables, for storing things like patterns or simple queries. --- src/env.c | 1 + src/n3.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++++----- src/node.c | 3 ++- src/reader.c | 1 + src/reader.h | 1 + src/serdi.c | 3 +++ src/writer.c | 14 ++++++++++++ 7 files changed, 90 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/env.c b/src/env.c index 5cbb6a43..3abbc857 100644 --- a/src/env.c +++ b/src/env.c @@ -371,6 +371,7 @@ serd_env_expand(const SerdEnv* env, const SerdNode* node) case SERD_CURIE: return expand_curie(env, node); case SERD_BLANK: + case SERD_VARIABLE: return serd_node_copy(node); } } diff --git a/src/n3.c b/src/n3.c index 4fe0363c..39cd515f 100644 --- a/src/n3.c +++ b/src/n3.c @@ -865,11 +865,51 @@ read_literal(SerdReader* reader, SerdNode** dest, bool* ate_dot) return SERD_SUCCESS; } +static SerdStatus +read_VARNAME(SerdReader* reader, SerdNode** dest) +{ + // Simplified from SPARQL: VARNAME ::= (PN_CHARS_U | [0-9])+ + SerdNode* n = *dest; + SerdStatus st = SERD_SUCCESS; + int c = 0; + peek_byte(reader); + while ((c = peek_byte(reader))) { + if (is_digit(c) || c == '_') { + push_byte(reader, n, eat_byte_safe(reader, c)); + } else if ((st = read_PN_CHARS(reader, n))) { + st = st > SERD_FAILURE ? st : SERD_SUCCESS; + break; + } + } + + return st; +} + +static SerdStatus +read_Var(SerdReader* reader, SerdNode** dest) +{ + if (!(reader->flags & SERD_READ_VARIABLES)) { + return r_err(reader, SERD_ERR_BAD_SYNTAX, + "syntax does not support variables\n"); + } else if (!(*dest = push_node(reader, SERD_VARIABLE, "", 0))) { + return SERD_ERR_OVERFLOW; + } + + assert(peek_byte(reader) == '$' || peek_byte(reader) == '?'); + serd_byte_source_advance(&reader->source); + + return read_VARNAME(reader, dest); +} + static SerdStatus read_verb(SerdReader* reader, SerdNode** dest) { const size_t orig_stack_size = reader->stack.size; - if (peek_byte(reader) == '<') { + + switch (peek_byte(reader)) { + case '$': case '?': + return read_Var(reader, dest); + case '<': return read_IRIREF(reader, dest); } @@ -1030,7 +1070,13 @@ read_object(SerdReader* reader, ReadContext* ctx, bool emit, bool* ate_dot) const int c = peek_byte(reader); if (!fancy_syntax(reader)) { switch (c) { - case '"': case ':': case '<': case '_': break; + case '"': case ':': case '<': case '_': + break; + case '$': case '?': + if (reader->flags & SERD_READ_VARIABLES) { + break; + } + break; default: return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected: ':', '<', or '_'\n"); @@ -1040,6 +1086,9 @@ read_object(SerdReader* reader, ReadContext* ctx, bool emit, bool* ate_dot) switch (c) { case EOF: case ')': return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected object\n"); + case '$': case '?': + ret = read_Var(reader, &o); + break; case '[': simple = false; ret = read_anon(reader, *ctx, false, &o); @@ -1241,6 +1290,9 @@ read_subject(SerdReader* reader, ReadContext ctx, SerdNode** dest, int* s_type) SerdStatus st = SERD_SUCCESS; bool ate_dot = false; switch ((*s_type = peek_byte(reader))) { + case '$': case '?': + st = read_Var(reader, dest); + break; case '[': read_anon(reader, ctx, true, dest); break; @@ -1558,11 +1610,21 @@ read_nquadsDoc(SerdReader* reader) return SERD_ERR_BAD_SYNTAX; } - // subject predicate object if ((st = read_subject(reader, ctx, &ctx.subject, &s_type)) || - !read_ws_star(reader) || - (st = read_IRIREF(reader, &ctx.predicate)) || - !read_ws_star(reader) || + !read_ws_star(reader)) { + return st; + } + + switch (peek_byte(reader)) { + case '$': case '?': + st = read_Var(reader, &ctx.predicate); + break; + case '<': + st = read_IRIREF(reader, &ctx.predicate); + break; + } + + if (st || !read_ws_star(reader) || (st = read_object(reader, &ctx, false, &ate_dot))) { return st; } diff --git a/src/node.c b/src/node.c index b33a03e9..66964556 100644 --- a/src/node.c +++ b/src/node.c @@ -156,7 +156,8 @@ serd_new_simple_node(SerdNodeType type, const char* str, const size_t len) { if (!str) { return NULL; - } else if (type != SERD_BLANK && type != SERD_CURIE && type != SERD_URI) { + } else if (type != SERD_BLANK && type != SERD_CURIE && type != SERD_URI && + type != SERD_VARIABLE) { return NULL; } diff --git a/src/reader.c b/src/reader.c index 668cd38d..3dd2f465 100644 --- a/src/reader.c +++ b/src/reader.c @@ -172,6 +172,7 @@ serd_reader_new(SerdWorld* world, me->sink = sink; me->stack = serd_stack_new(stack_size, serd_node_align); me->syntax = syntax; + me->flags = flags; me->next_id = 1; me->strict = !(flags & SERD_READ_LAX); diff --git a/src/reader.h b/src/reader.h index 21dc2e04..a5e1f3f0 100644 --- a/src/reader.h +++ b/src/reader.h @@ -60,6 +60,7 @@ struct SerdReaderImpl { SerdByteSource source; SerdStack stack; SerdSyntax syntax; + SerdReaderFlags flags; unsigned next_id; uint8_t* buf; char* bprefix; diff --git a/src/serdi.c b/src/serdi.c index 7f4880fd..1339e593 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -72,6 +72,7 @@ print_usage(const char* name, bool error) fprintf(os, " -s INPUT Parse INPUT as string.\n"); fprintf(os, " -t Write terser output without newlines.\n"); fprintf(os, " -v Display version information and exit.\n"); + fprintf(os, " -x Support parsing variable nodes like `?x'.\n"); return error ? 1 : 0; } @@ -228,6 +229,8 @@ main(int argc, char** argv) return missing_arg(argv[0], 'r'); } root_uri = argv[a]; + } else if (argv[a][1] == 'x') { + reader_flags |= SERD_READ_VARIABLES; } else { SERDI_ERRORF("invalid option -- '%s'\n", argv[a] + 1); return print_usage(argv[0], true); diff --git a/src/writer.c b/src/writer.c index acb63d7d..d3326647 100644 --- a/src/writer.c +++ b/src/writer.c @@ -772,6 +772,18 @@ write_blank(SerdWriter* const writer, return st; } +SERD_WARN_UNUSED_RESULT static SerdStatus +write_variable(SerdWriter* const writer, const SerdNode* node) +{ + SerdStatus st = SERD_SUCCESS; + + TRY(st, esink("?", 1, writer)); + TRY(st, esink(serd_node_string(node), node->n_bytes, writer)); + + writer->last_sep = SEP_NONE; + return st; +} + SERD_WARN_UNUSED_RESULT static SerdStatus write_node(SerdWriter* writer, const SerdNode* node, @@ -787,6 +799,8 @@ write_node(SerdWriter* writer, return write_curie(writer, node, field, flags); case SERD_BLANK: return write_blank(writer, node, field, flags); + case SERD_VARIABLE: + return write_variable(writer, node); } return SERD_ERR_INTERNAL; -- cgit v1.2.1