diff options
author | David Robillard <d@drobilla.net> | 2023-12-01 21:59:18 -0500 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2023-12-02 18:49:08 -0500 |
commit | 9b1139fe7045a0630e87501235af21803860b80c (patch) | |
tree | 3a7cd3ee639e0b7a6f2244177819f8843202c692 /src | |
parent | 94879f376f1d2b8fbb2322bf2a7dab5c3bb9e098 (diff) | |
download | serd-9b1139fe7045a0630e87501235af21803860b80c.tar.gz serd-9b1139fe7045a0630e87501235af21803860b80c.tar.bz2 serd-9b1139fe7045a0630e87501235af21803860b80c.zip |
[WIP] Add support for reading and writing variables
[WIP] Command line option, move later?
This adds a reader flag and serdi option for extending a syntax with support
for SPARQL-like variables, for storing things like patterns or simple queries.
Diffstat (limited to 'src')
-rw-r--r-- | src/env.c | 1 | ||||
-rw-r--r-- | src/read_ntriples.c | 52 | ||||
-rw-r--r-- | src/read_turtle.c | 17 | ||||
-rw-r--r-- | src/reader.c | 1 | ||||
-rw-r--r-- | src/reader.h | 1 | ||||
-rw-r--r-- | src/statement.c | 3 | ||||
-rw-r--r-- | src/writer.c | 15 |
7 files changed, 86 insertions, 4 deletions
@@ -319,6 +319,7 @@ serd_env_expand(const SerdEnv* env, const SerdNode* node) case SERD_CURIE: return expand_curie(env, node); case SERD_BLANK: + case SERD_VARIABLE: break; } diff --git a/src/read_ntriples.c b/src/read_ntriples.c index da726658..bec59c13 100644 --- a/src/read_ntriples.c +++ b/src/read_ntriples.c @@ -520,6 +520,51 @@ read_HEX(SerdReader* const reader) return 0; } +/** + Read a variable name, starting after the '?' or '$'. + + This is an extension that serd uses in certain contexts to support patterns. + + Restricted version of SPARQL 1.1: [166] VARNAME +*/ +static SerdStatus +read_VARNAME(SerdReader* const reader, SerdNode** const dest) +{ + // Simplified from SPARQL: VARNAME ::= (PN_CHARS_U | [0-9])+ + SerdNode* n = *dest; + SerdStatus st = SERD_SUCCESS; + int c = 0; + + while ((c = peek_byte(reader))) { + if (is_digit(c) || c == '_') { + st = push_byte(reader, n, eat_byte_safe(reader, c)); + } else if ((st = read_PN_CHARS(reader, n))) { + st = st > SERD_FAILURE ? st : SERD_SUCCESS; + break; + } + } + + return st; +} + +SerdStatus +read_Var(SerdReader* const reader, SerdNode** const dest) +{ + if (!(reader->flags & SERD_READ_VARIABLES)) { + return r_err(reader, SERD_BAD_SYNTAX, "syntax does not support variables"); + } + + const int c = peek_byte(reader); + assert(c == '$' || c == '?'); + skip_byte(reader, c); + + if (!(*dest = push_node(reader, SERD_VARIABLE, "", 0))) { + return SERD_BAD_STACK; + } + + return read_VARNAME(reader, dest); +} + // Nonterminals // comment ::= '#' ( [^#xA #xD] )* @@ -576,6 +621,8 @@ read_nt_subject(SerdReader* const reader, SerdNode** const dest) switch (peek_byte(reader)) { case '<': return read_IRI(reader, dest); + case '?': + return read_Var(reader, dest); case '_': return read_BLANK_NODE_LABEL(reader, dest, &ate_dot); default: @@ -589,7 +636,8 @@ read_nt_subject(SerdReader* const reader, SerdNode** const dest) SerdStatus read_nt_predicate(SerdReader* const reader, SerdNode** const dest) { - return read_IRI(reader, dest); + return (peek_byte(reader) == '?') ? read_Var(reader, dest) + : read_IRI(reader, dest); } /// [4] object @@ -605,6 +653,8 @@ read_nt_object(SerdReader* const reader, return read_literal(reader, dest); case '<': return read_IRI(reader, dest); + case '?': + return read_Var(reader, dest); case '_': return read_BLANK_NODE_LABEL(reader, dest, ate_dot); default: diff --git a/src/read_turtle.c b/src/read_turtle.c index 828818ba..027f0918 100644 --- a/src/read_turtle.c +++ b/src/read_turtle.c @@ -458,10 +458,15 @@ read_literal(SerdReader* const reader, } static SerdStatus -read_verb(SerdReader* const reader, SerdNode** const dest) +read_verb(SerdReader* reader, SerdNode** const dest) { const size_t orig_stack_size = reader->stack.size; - if (peek_byte(reader) == '<') { + + switch (peek_byte(reader)) { + case '$': + case '?': + return read_Var(reader, dest); + case '<': return read_IRIREF(reader, dest); } @@ -617,6 +622,10 @@ read_object(SerdReader* const reader, case EOF: case ')': return r_err(reader, SERD_BAD_SYNTAX, "expected object"); + case '$': + case '?': + st = read_Var(reader, &o); + break; case '[': simple = false; st = read_anon(reader, *ctx, false, &o); @@ -813,6 +822,10 @@ read_turtle_subject(SerdReader* const reader, SerdStatus st = SERD_SUCCESS; bool ate_dot = false; switch ((*s_type = peek_byte(reader))) { + case '$': + case '?': + st = read_Var(reader, dest); + break; case '[': st = read_anon(reader, ctx, true, dest); break; diff --git a/src/reader.c b/src/reader.c index bf6d697f..6f5ccce6 100644 --- a/src/reader.c +++ b/src/reader.c @@ -233,6 +233,7 @@ serd_reader_new(SerdWorld* const world, me->sink = sink; me->stack = serd_stack_new(stack_size, serd_node_align); me->syntax = syntax; + me->flags = flags; me->next_id = 1; me->strict = !(flags & SERD_READ_LAX); diff --git a/src/reader.h b/src/reader.h index 73647307..01ad5890 100644 --- a/src/reader.h +++ b/src/reader.h @@ -44,6 +44,7 @@ struct SerdReaderImpl { SerdByteSource* source; SerdStack stack; SerdSyntax syntax; + SerdReaderFlags flags; unsigned next_id; char* bprefix; size_t bprefix_len; diff --git a/src/statement.c b/src/statement.c index 434d41d4..9d2d4229 100644 --- a/src/statement.c +++ b/src/statement.c @@ -17,7 +17,8 @@ static bool is_resource(const SerdNode* const node) { const SerdNodeType type = node ? serd_node_type(node) : (SerdNodeType)0; - return type == SERD_URI || type == SERD_CURIE || type == SERD_BLANK; + return type == SERD_URI || type == SERD_CURIE || type == SERD_BLANK || + type == SERD_VARIABLE; } bool diff --git a/src/writer.c b/src/writer.c index 60a387c0..372e4279 100644 --- a/src/writer.c +++ b/src/writer.c @@ -943,6 +943,18 @@ write_blank(SerdWriter* const writer, } SERD_NODISCARD static SerdStatus +write_variable(SerdWriter* const writer, const SerdNode* const node) +{ + SerdStatus st = SERD_SUCCESS; + + TRY(st, esink("?", 1, writer)); + TRY(st, esink(serd_node_string(node), node->length, writer)); + + writer->last_sep = SEP_NONE; + return st; +} + +SERD_NODISCARD static SerdStatus write_node(SerdWriter* const writer, const SerdNode* const node, const SerdField field, @@ -963,6 +975,9 @@ write_node(SerdWriter* const writer, case SERD_BLANK: st = write_blank(writer, node, field, flags); break; + case SERD_VARIABLE: + st = write_variable(writer, node); + break; } if (node->type != SERD_BLANK) { |