diff options
author | David Robillard <d@drobilla.net> | 2019-12-18 19:09:49 -0500 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2022-01-13 23:05:26 -0500 |
commit | 5d43cb36087292a397992aa1b59326fc355d5247 (patch) | |
tree | f313f87fea160cd52af3bba8c3de77c25b0519bf /src | |
parent | 55e28966226268a57edb07419ac419ef53ac437d (diff) | |
download | serd-5d43cb36087292a397992aa1b59326fc355d5247.tar.gz serd-5d43cb36087292a397992aa1b59326fc355d5247.tar.bz2 serd-5d43cb36087292a397992aa1b59326fc355d5247.zip |
Add support for parsing variables
This adds a reader flag and serdi option for extending a syntax with support
for SPARQL-like variables, for storing things like patterns or simple queries.
Diffstat (limited to 'src')
-rw-r--r-- | src/env.c | 1 | ||||
-rw-r--r-- | src/n3.c | 81 | ||||
-rw-r--r-- | src/node.c | 3 | ||||
-rw-r--r-- | src/reader.c | 1 | ||||
-rw-r--r-- | src/reader.h | 1 | ||||
-rw-r--r-- | src/serdi.c | 3 | ||||
-rw-r--r-- | src/statement.c | 3 | ||||
-rw-r--r-- | src/writer.c | 15 |
8 files changed, 101 insertions, 7 deletions
@@ -322,6 +322,7 @@ serd_env_expand(const SerdEnv* env, const SerdNode* node) case SERD_CURIE: return expand_curie(env, node); case SERD_BLANK: + case SERD_VARIABLE: break; } @@ -963,10 +963,53 @@ read_literal(SerdReader* const reader, } static SerdStatus -read_verb(SerdReader* const reader, SerdNode** const dest) +read_VARNAME(SerdReader* const reader, SerdNode** const dest) +{ + // Simplified from SPARQL: VARNAME ::= (PN_CHARS_U | [0-9])+ + SerdNode* n = *dest; + SerdStatus st = SERD_SUCCESS; + int c = 0; + peek_byte(reader); + while ((c = peek_byte(reader))) { + if (is_digit(c) || c == '_') { + st = push_byte(reader, n, eat_byte_safe(reader, c)); + } else if ((st = read_PN_CHARS(reader, n))) { + st = st > SERD_FAILURE ? st : SERD_SUCCESS; + break; + } + } + + return st; +} + +static SerdStatus +read_Var(SerdReader* const reader, SerdNode** const dest) +{ + if (!(reader->flags & SERD_READ_VARIABLES)) { + return r_err( + reader, SERD_ERR_BAD_SYNTAX, "syntax does not support variables\n"); + } + + if (!(*dest = push_node(reader, SERD_VARIABLE, "", 0))) { + return SERD_ERR_OVERFLOW; + } + + assert(peek_byte(reader) == '$' || peek_byte(reader) == '?'); + serd_byte_source_advance(reader->source); + + return read_VARNAME(reader, dest); +} + +static SerdStatus +read_verb(SerdReader* reader, SerdNode** dest) { const size_t orig_stack_size = reader->stack.size; - if (peek_byte(reader) == '<') { + + switch (peek_byte(reader)) { + case '$': + case '?': + return read_Var(reader, dest); + case '<': return read_IRIREF(reader, dest); } @@ -1138,6 +1181,9 @@ read_object(SerdReader* const reader, case '<': case '_': break; + case '$': + case '?': + break; default: return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected: ':', '<', or '_'\n"); } @@ -1147,6 +1193,10 @@ read_object(SerdReader* const reader, case EOF: case ')': return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected object\n"); + case '$': + case '?': + ret = read_Var(reader, &o); + break; case '[': simple = false; ret = read_anon(reader, *ctx, false, &o); @@ -1375,6 +1425,10 @@ read_subject(SerdReader* const reader, SerdStatus st = SERD_SUCCESS; bool ate_dot = false; switch ((*s_type = peek_byte(reader))) { + case '$': + case '?': + st = read_Var(reader, dest); + break; case '[': st = read_anon(reader, ctx, true, dest); break; @@ -1643,6 +1697,7 @@ read_n3_statement(SerdReader* const reader) if (s_type == '(' || (s_type == '[' && !*ctx.flags)) { return r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid graph name\n"); } + ctx.graph = ctx.subject; ctx.subject = NULL; TRY(st, read_wrappedGraph(reader, &ctx)); @@ -1658,6 +1713,7 @@ read_n3_statement(SerdReader* const reader) } return st > SERD_FAILURE ? st : SERD_ERR_BAD_SYNTAX; + } else if (!ate_dot) { read_ws_star(reader); st = eat_byte_check(reader, '.'); @@ -1717,10 +1773,22 @@ read_nquadsDoc(SerdReader* const reader) return SERD_ERR_BAD_SYNTAX; } - // subject predicate object if ((st = read_subject(reader, ctx, &ctx.subject, &s_type)) || - !read_ws_star(reader) || (st = read_IRIREF(reader, &ctx.predicate)) || - !read_ws_star(reader) || + !read_ws_star(reader)) { + return st; + } + + switch (peek_byte(reader)) { + case '$': + case '?': + st = read_Var(reader, &ctx.predicate); + break; + case '<': + st = read_IRIREF(reader, &ctx.predicate); + break; + } + + if (st || !read_ws_star(reader) || (st = read_object(reader, &ctx, false, &ate_dot))) { return st; } @@ -1730,6 +1798,9 @@ read_nquadsDoc(SerdReader* const reader) switch (peek_byte(reader)) { case '.': break; + case '?': + TRY(st, read_Var(reader, &ctx.graph)); + break; case '_': TRY(st, read_BLANK_NODE_LABEL(reader, &ctx.graph, &ate_dot)); break; @@ -170,7 +170,8 @@ result(const SerdStatus status, const size_t count) SerdNode* serd_new_simple_node(const SerdNodeType type, const SerdStringView str) { - if (type != SERD_BLANK && type != SERD_CURIE && type != SERD_URI) { + if (type != SERD_BLANK && type != SERD_CURIE && type != SERD_URI && + type != SERD_VARIABLE) { return NULL; } diff --git a/src/reader.c b/src/reader.c index fe88ee14..d70cbb53 100644 --- a/src/reader.c +++ b/src/reader.c @@ -196,6 +196,7 @@ serd_reader_new(SerdWorld* const world, me->sink = sink; me->stack = serd_stack_new(stack_size, serd_node_align); me->syntax = syntax; + me->flags = flags; me->next_id = 1; me->strict = !(flags & SERD_READ_LAX); diff --git a/src/reader.h b/src/reader.h index 76f46506..b08c3fd0 100644 --- a/src/reader.h +++ b/src/reader.h @@ -53,6 +53,7 @@ struct SerdReaderImpl { SerdByteSource* source; SerdStack stack; SerdSyntax syntax; + SerdReaderFlags flags; unsigned next_id; uint8_t* buf; char* bprefix; diff --git a/src/serdi.c b/src/serdi.c index 3025b494..9a5495af 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -74,6 +74,7 @@ print_usage(const char* const name, const bool error) fprintf(os, " -t Write terser output without newlines.\n"); fprintf(os, " -v Display version information and exit.\n"); fprintf(os, " -w FILENAME Write output to FILENAME instead of stdout.\n"); + fprintf(os, " -x Support parsing variable nodes like `?x'.\n"); return error ? 1 : 0; } @@ -190,6 +191,8 @@ main(int argc, char** argv) writer_flags |= SERD_WRITE_TERSE; } else if (opt == 'v') { return print_version(); + } else if (opt == 'x') { + reader_flags |= SERD_READ_VARIABLES; } else if (argv[a][1] == 'I') { if (++a == argc) { return missing_arg(prog, 'I'); diff --git a/src/statement.c b/src/statement.c index 51788e05..0ec6520a 100644 --- a/src/statement.c +++ b/src/statement.c @@ -28,7 +28,8 @@ is_resource(const SerdNode* const node) { const SerdNodeType type = serd_node_type(node); - return type == SERD_URI || type == SERD_CURIE || type == SERD_BLANK; + return type == SERD_URI || type == SERD_CURIE || type == SERD_BLANK || + type == SERD_VARIABLE; } bool diff --git a/src/writer.c b/src/writer.c index 2cee09c5..1ad62a18 100644 --- a/src/writer.c +++ b/src/writer.c @@ -899,6 +899,18 @@ write_blank(SerdWriter* const writer, } SERD_WARN_UNUSED_RESULT static SerdStatus +write_variable(SerdWriter* const writer, const SerdNode* const node) +{ + SerdStatus st = SERD_SUCCESS; + + TRY(st, esink("?", 1, writer)); + TRY(st, esink(serd_node_string(node), node->length, writer)); + + writer->last_sep = SEP_NONE; + return st; +} + +SERD_WARN_UNUSED_RESULT static SerdStatus write_node(SerdWriter* const writer, const SerdNode* const node, const SerdField field, @@ -919,6 +931,9 @@ write_node(SerdWriter* const writer, case SERD_BLANK: st = write_blank(writer, node, field, flags); break; + case SERD_VARIABLE: + st = write_variable(writer, node); + break; } return st; |