diff options
30 files changed, 271 insertions, 23 deletions
@@ -2,6 +2,7 @@ serd (1.1.1) unstable; urgency=medium * Add SerdBuffer for mutable buffers to keep SerdChunk const-correct * Add SerdWorld for shared library state + * Add support for parsing variables * Add support for writing terse output with minimal newlines * Add support for xsd:float and xsd:double literals * Bring read/write interface closer to C standard @@ -19,7 +20,7 @@ serd (1.1.1) unstable; urgency=medium * Use a fixed-size reader stack * Use char* for strings in public API - -- David Robillard <d@drobilla.net> Mon, 19 Dec 2022 20:54:56 +0000 + -- David Robillard <d@drobilla.net> Mon, 19 Dec 2022 20:55:34 +0000 serd (0.32.0) stable; urgency=medium diff --git a/doc/man/serd-pipe.1 b/doc/man/serd-pipe.1 index 9b700068..f5908c1d 100644 --- a/doc/man/serd-pipe.1 +++ b/doc/man/serd-pipe.1 @@ -8,7 +8,7 @@ .Nd read and write RDF data .Sh SYNOPSIS .Nm serd-pipe -.Op Fl afhlqtv +.Op Fl afhlqtvx .Op Fl B Ar base .Op Fl b Ar bytes .Op Fl c Ar prefix @@ -158,6 +158,12 @@ as input. Write terser output without newlines. .It Fl v Display version information and exit. +.It Fl x +Support parsing variable nodes. +Variables can be written in SPARQL style, for example +.Dq ?var +or +.Dq $var . .El .Sh EXIT STATUS .Nm diff --git a/include/serd/node.h b/include/serd/node.h index b308a507..1faba160 100644 --- a/include/serd/node.h +++ b/include/serd/node.h @@ -38,9 +38,14 @@ typedef struct SerdNodeImpl SerdNode; /** Type of a node. - An abstract RDF node can be either a resource or a literal. This type is - more precise to preserve syntactic differences and support additional - features. + Note that this set of types is both more precise than, and extended from, + the possible types of an abstract RDF node. Not all types can occur in all + contexts, for example, a Turtle document can't contain a variable node. + + The string value of a node never contains quoting or other type indicators. + For example, the blank node `_:id3` and the plain literal `"id3"` from a + Turtle document would both have the same string, "id3", returned by + #serd_node_string. */ typedef enum { /** @@ -82,6 +87,19 @@ typedef enum { Turtle](http://www.w3.org/TR/turtle/#grammar-production-BLANK_NODE_LABEL) */ SERD_BLANK = 4, + + /** + A variable node. + + A variable's identity is, like blank nodes, local to its context. + Variables are typically used in interfaces for querying, rather than + present in data (there is no concept of a variable in RDF or its standard + syntaxes). + + @see [SPARQL 1.1 Query + Language](https://www.w3.org/TR/sparql11-query/#rVar) + */ + SERD_VARIABLE = 5, } SerdNodeType; /// Node flags, which ORed together make a #SerdNodeFlags diff --git a/include/serd/reader.h b/include/serd/reader.h index 4c669342..040f3398 100644 --- a/include/serd/reader.h +++ b/include/serd/reader.h @@ -29,7 +29,8 @@ typedef struct SerdReaderImpl SerdReader; /// Reader options typedef enum { - SERD_READ_LAX = 1U << 0U, ///< Tolerate invalid input where possible + SERD_READ_LAX = 1U << 0U, ///< Tolerate invalid input where possible + SERD_READ_VARIABLES = 1U << 1U, ///< Support variable nodes } SerdReaderFlag; /// Bitwise OR of SerdReaderFlag values @@ -319,6 +319,7 @@ serd_env_expand(const SerdEnv* env, const SerdNode* node) case SERD_CURIE: return expand_curie(env, node); case SERD_BLANK: + case SERD_VARIABLE: break; } diff --git a/src/read_ntriples.c b/src/read_ntriples.c index da726658..bec59c13 100644 --- a/src/read_ntriples.c +++ b/src/read_ntriples.c @@ -520,6 +520,51 @@ read_HEX(SerdReader* const reader) return 0; } +/** + Read a variable name, starting after the '?' or '$'. + + This is an extension that serd uses in certain contexts to support patterns. + + Restricted version of SPARQL 1.1: [166] VARNAME +*/ +static SerdStatus +read_VARNAME(SerdReader* const reader, SerdNode** const dest) +{ + // Simplified from SPARQL: VARNAME ::= (PN_CHARS_U | [0-9])+ + SerdNode* n = *dest; + SerdStatus st = SERD_SUCCESS; + int c = 0; + + while ((c = peek_byte(reader))) { + if (is_digit(c) || c == '_') { + st = push_byte(reader, n, eat_byte_safe(reader, c)); + } else if ((st = read_PN_CHARS(reader, n))) { + st = st > SERD_FAILURE ? st : SERD_SUCCESS; + break; + } + } + + return st; +} + +SerdStatus +read_Var(SerdReader* const reader, SerdNode** const dest) +{ + if (!(reader->flags & SERD_READ_VARIABLES)) { + return r_err(reader, SERD_BAD_SYNTAX, "syntax does not support variables"); + } + + const int c = peek_byte(reader); + assert(c == '$' || c == '?'); + skip_byte(reader, c); + + if (!(*dest = push_node(reader, SERD_VARIABLE, "", 0))) { + return SERD_BAD_STACK; + } + + return read_VARNAME(reader, dest); +} + // Nonterminals // comment ::= '#' ( [^#xA #xD] )* @@ -576,6 +621,8 @@ read_nt_subject(SerdReader* const reader, SerdNode** const dest) switch (peek_byte(reader)) { case '<': return read_IRI(reader, dest); + case '?': + return read_Var(reader, dest); case '_': return read_BLANK_NODE_LABEL(reader, dest, &ate_dot); default: @@ -589,7 +636,8 @@ read_nt_subject(SerdReader* const reader, SerdNode** const dest) SerdStatus read_nt_predicate(SerdReader* const reader, SerdNode** const dest) { - return read_IRI(reader, dest); + return (peek_byte(reader) == '?') ? read_Var(reader, dest) + : read_IRI(reader, dest); } /// [4] object @@ -605,6 +653,8 @@ read_nt_object(SerdReader* const reader, return read_literal(reader, dest); case '<': return read_IRI(reader, dest); + case '?': + return read_Var(reader, dest); case '_': return read_BLANK_NODE_LABEL(reader, dest, ate_dot); default: diff --git a/src/read_turtle.c b/src/read_turtle.c index 828818ba..027f0918 100644 --- a/src/read_turtle.c +++ b/src/read_turtle.c @@ -458,10 +458,15 @@ read_literal(SerdReader* const reader, } static SerdStatus -read_verb(SerdReader* const reader, SerdNode** const dest) +read_verb(SerdReader* reader, SerdNode** const dest) { const size_t orig_stack_size = reader->stack.size; - if (peek_byte(reader) == '<') { + + switch (peek_byte(reader)) { + case '$': + case '?': + return read_Var(reader, dest); + case '<': return read_IRIREF(reader, dest); } @@ -617,6 +622,10 @@ read_object(SerdReader* const reader, case EOF: case ')': return r_err(reader, SERD_BAD_SYNTAX, "expected object"); + case '$': + case '?': + st = read_Var(reader, &o); + break; case '[': simple = false; st = read_anon(reader, *ctx, false, &o); @@ -813,6 +822,10 @@ read_turtle_subject(SerdReader* const reader, SerdStatus st = SERD_SUCCESS; bool ate_dot = false; switch ((*s_type = peek_byte(reader))) { + case '$': + case '?': + st = read_Var(reader, dest); + break; case '[': st = read_anon(reader, ctx, true, dest); break; diff --git a/src/reader.c b/src/reader.c index bf6d697f..6f5ccce6 100644 --- a/src/reader.c +++ b/src/reader.c @@ -233,6 +233,7 @@ serd_reader_new(SerdWorld* const world, me->sink = sink; me->stack = serd_stack_new(stack_size, serd_node_align); me->syntax = syntax; + me->flags = flags; me->next_id = 1; me->strict = !(flags & SERD_READ_LAX); diff --git a/src/reader.h b/src/reader.h index 73647307..01ad5890 100644 --- a/src/reader.h +++ b/src/reader.h @@ -44,6 +44,7 @@ struct SerdReaderImpl { SerdByteSource* source; SerdStack stack; SerdSyntax syntax; + SerdReaderFlags flags; unsigned next_id; char* bprefix; size_t bprefix_len; diff --git a/src/statement.c b/src/statement.c index 434d41d4..9d2d4229 100644 --- a/src/statement.c +++ b/src/statement.c @@ -17,7 +17,8 @@ static bool is_resource(const SerdNode* const node) { const SerdNodeType type = node ? serd_node_type(node) : (SerdNodeType)0; - return type == SERD_URI || type == SERD_CURIE || type == SERD_BLANK; + return type == SERD_URI || type == SERD_CURIE || type == SERD_BLANK || + type == SERD_VARIABLE; } bool diff --git a/src/writer.c b/src/writer.c index 60a387c0..372e4279 100644 --- a/src/writer.c +++ b/src/writer.c @@ -943,6 +943,18 @@ write_blank(SerdWriter* const writer, } SERD_NODISCARD static SerdStatus +write_variable(SerdWriter* const writer, const SerdNode* const node) +{ + SerdStatus st = SERD_SUCCESS; + + TRY(st, esink("?", 1, writer)); + TRY(st, esink(serd_node_string(node), node->length, writer)); + + writer->last_sep = SEP_NONE; + return st; +} + +SERD_NODISCARD static SerdStatus write_node(SerdWriter* const writer, const SerdNode* const node, const SerdField field, @@ -963,6 +975,9 @@ write_node(SerdWriter* const writer, case SERD_BLANK: st = write_blank(writer, node, field, flags); break; + case SERD_VARIABLE: + st = write_variable(writer, node); + break; } if (node->type != SERD_BLANK) { diff --git a/test/extra/bad/bad-var.ttl b/test/extra/bad/bad-var.ttl new file mode 100644 index 00000000..29b5b008 --- /dev/null +++ b/test/extra/bad/bad-var.ttl @@ -0,0 +1,2 @@ +?s + <http://example.org/p> <http://example.org/o> . diff --git a/test/extra/bad/manifest.ttl b/test/extra/bad/manifest.ttl index 7ab5a427..cd4aee24 100644 --- a/test/extra/bad/manifest.ttl +++ b/test/extra/bad/manifest.ttl @@ -87,6 +87,7 @@ <#bad-uri-scheme> <#bad-uri-scheme-start> <#bad-uri-truncated> + <#bad-var> <#bad-verb> ) . @@ -490,6 +491,11 @@ mf:action <bad-uri-truncated.nt> ; mf:name "bad-uri-truncated" . +<#bad-var> + a rdft:TestTurtleNegativeSyntax ; + mf:action <bad-var.ttl> ; + mf:name "bad-var" . + <#bad-verb> a rdft:TestTurtleNegativeSyntax ; mf:action <bad-verb.ttl> ; diff --git a/test/extra/pattern/bad-pattern-graph.nq b/test/extra/pattern/bad-pattern-graph.nq new file mode 100644 index 00000000..9c77ac9a --- /dev/null +++ b/test/extra/pattern/bad-pattern-graph.nq @@ -0,0 +1 @@ +<http://example.org/s> <http://example.org/p> <http://example.org/o> ?gµ . diff --git a/test/extra/pattern/bad-pattern-object.nq b/test/extra/pattern/bad-pattern-object.nq new file mode 100644 index 00000000..7f22b520 --- /dev/null +++ b/test/extra/pattern/bad-pattern-object.nq @@ -0,0 +1 @@ +<http://example.org/s> <http://example.org/p> ?oµ <http://example.org/g> . diff --git a/test/extra/pattern/bad-pattern-object.nt b/test/extra/pattern/bad-pattern-object.nt new file mode 100644 index 00000000..aef057e4 --- /dev/null +++ b/test/extra/pattern/bad-pattern-object.nt @@ -0,0 +1 @@ +<http://example.org/s> <http://example.org/p> ?oµ . diff --git a/test/extra/pattern/bad-pattern-object.ttl b/test/extra/pattern/bad-pattern-object.ttl new file mode 100644 index 00000000..cc2eee77 --- /dev/null +++ b/test/extra/pattern/bad-pattern-object.ttl @@ -0,0 +1,2 @@ +<http://example.org/s> + <http://example.org/o> ?oµ . diff --git a/test/extra/pattern/bad-pattern-predicate.nq b/test/extra/pattern/bad-pattern-predicate.nq new file mode 100644 index 00000000..cfe466c9 --- /dev/null +++ b/test/extra/pattern/bad-pattern-predicate.nq @@ -0,0 +1 @@ +<http://example.org/s> ?pµ <http://example.org/o> <http://example.org/g> . diff --git a/test/extra/pattern/bad-pattern-predicate.nt b/test/extra/pattern/bad-pattern-predicate.nt new file mode 100644 index 00000000..a7e0c994 --- /dev/null +++ b/test/extra/pattern/bad-pattern-predicate.nt @@ -0,0 +1 @@ +<http://example.org/s> ?pµ <http://example.org/o> . diff --git a/test/extra/pattern/bad-pattern-predicate.ttl b/test/extra/pattern/bad-pattern-predicate.ttl new file mode 100644 index 00000000..cc2bd6b1 --- /dev/null +++ b/test/extra/pattern/bad-pattern-predicate.ttl @@ -0,0 +1,2 @@ +<http://example.org/s> + ?pµ <http://example.org/o> . diff --git a/test/extra/pattern/bad-pattern-subject.nq b/test/extra/pattern/bad-pattern-subject.nq new file mode 100644 index 00000000..341c437f --- /dev/null +++ b/test/extra/pattern/bad-pattern-subject.nq @@ -0,0 +1 @@ +?sµ <http://example.org/p> <http://example.org/o> <http://example.org/g> . diff --git a/test/extra/pattern/bad-pattern-subject.nt b/test/extra/pattern/bad-pattern-subject.nt new file mode 100644 index 00000000..5bbb29ff --- /dev/null +++ b/test/extra/pattern/bad-pattern-subject.nt @@ -0,0 +1 @@ +?sµ <http://example.org/p> <http://example.org/o> . diff --git a/test/extra/pattern/bad-pattern-subject.ttl b/test/extra/pattern/bad-pattern-subject.ttl new file mode 100644 index 00000000..5f3dbfdd --- /dev/null +++ b/test/extra/pattern/bad-pattern-subject.ttl @@ -0,0 +1,2 @@ +?sµ + <http://example.org/p> <http://example.org/o> . diff --git a/test/extra/pattern/manifest.ttl b/test/extra/pattern/manifest.ttl new file mode 100644 index 00000000..e5690218 --- /dev/null +++ b/test/extra/pattern/manifest.ttl @@ -0,0 +1,88 @@ +@prefix mf: <http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#> . +@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . +@prefix rdft: <http://www.w3.org/ns/rdftest#> . + +<> + a mf:Manifest ; + rdfs:comment "Serd pattern syntax test suite" ; + mf:entries ( + <#bad-pattern-graph-nq> + <#bad-pattern-object-nq> + <#bad-pattern-object-nt> + <#bad-pattern-object-ttl> + <#bad-pattern-predicate-nq> + <#bad-pattern-predicate-nt> + <#bad-pattern-predicate-ttl> + <#bad-pattern-subject-nq> + <#bad-pattern-subject-nt> + <#bad-pattern-subject-ttl> + <#test-graph-pattern> + <#test-pattern-nt> + <#test-pattern-ttl> + ) . + +<#bad-pattern-graph-nq> + a rdft:TestNQuadsNegativeSyntax ; + mf:action <bad-pattern-graph.nq> ; + mf:name "bad-pattern-graph-nq" . + +<#bad-pattern-object-nq> + a rdft:TestNQuadsNegativeSyntax ; + mf:action <bad-pattern-object.nq> ; + mf:name "bad-pattern-object-nq" . + +<#bad-pattern-object-nt> + a rdft:TestNTriplesNegativeSyntax ; + mf:action <bad-pattern-object.nt> ; + mf:name "bad-pattern-object-nt" . + +<#bad-pattern-object-ttl> + a rdft:TestTurtleNegativeSyntax ; + mf:action <bad-pattern-object.ttl> ; + mf:name "bad-pattern-object-ttl" . + +<#bad-pattern-predicate-nq> + a rdft:TestNQuadsNegativeSyntax ; + mf:action <bad-pattern-predicate.nq> ; + mf:name "bad-pattern-predicate-nq" . + +<#bad-pattern-predicate-nt> + a rdft:TestNTriplesNegativeSyntax ; + mf:action <bad-pattern-predicate.nt> ; + mf:name "bad-pattern-predicate-nt" . + +<#bad-pattern-predicate-ttl> + a rdft:TestTurtleNegativeSyntax ; + mf:action <bad-pattern-predicate.ttl> ; + mf:name "bad-pattern-predicate-ttl" . + +<#bad-pattern-subject-nq> + a rdft:TestNQuadsNegativeSyntax ; + mf:action <bad-pattern-subject.nq> ; + mf:name "bad-pattern-subject-nq" . + +<#bad-pattern-subject-nt> + a rdft:TestNTriplesNegativeSyntax ; + mf:action <bad-pattern-subject.nt> ; + mf:name "bad-pattern-subject-nt" . + +<#bad-pattern-subject-ttl> + a rdft:TestTurtleNegativeSyntax ; + mf:action <bad-pattern-subject.ttl> ; + mf:name "bad-pattern-subject-ttl" . + +<#test-graph-pattern> + a rdft:TestNQuadsPositiveSyntax ; + mf:action <test-graph-pattern.nq> ; + mf:name "test-graph-pattern" . + +<#test-pattern-nt> + a rdft:TestNTriplesPositiveSyntax ; + mf:action <test-pattern.nt> ; + mf:name "test-pattern-nt" . + +<#test-pattern-ttl> + a rdft:TestTurtleEval ; + mf:action <test-pattern.ttl> ; + mf:name "test-pattern" ; + mf:result <test-pattern.nt> . diff --git a/test/extra/pattern/test-graph-pattern.nq b/test/extra/pattern/test-graph-pattern.nq new file mode 100644 index 00000000..453e9516 --- /dev/null +++ b/test/extra/pattern/test-graph-pattern.nq @@ -0,0 +1 @@ +<http://a.example/s> <http://a.example/p> <http://a.example/o> ?g . diff --git a/test/extra/pattern/test-pattern.nt b/test/extra/pattern/test-pattern.nt new file mode 100644 index 00000000..ddfe6d3c --- /dev/null +++ b/test/extra/pattern/test-pattern.nt @@ -0,0 +1,6 @@ +?s <http://example.org/p1> <http://example.org/o1> . +<http://example.org/s> ?p <http://example.org/o1> . +<http://example.org/s> <http://example.org/p1> ?o . +<http://example.org/s> <http://example.org/p2> _:b1 . +_:b1 ?2p <http://example.org/o2> . +_:b1 <http://example.org/p3> ?_o . diff --git a/test/extra/pattern/test-pattern.ttl b/test/extra/pattern/test-pattern.ttl new file mode 100644 index 00000000..3742e5ed --- /dev/null +++ b/test/extra/pattern/test-pattern.ttl @@ -0,0 +1,10 @@ +?s + <http://example.org/p1> <http://example.org/o1> . + +<http://example.org/s> + ?p <http://example.org/o1> ; + <http://example.org/p1> ?o ; + <http://example.org/p2> [ + ?2p <http://example.org/o2> ; + <http://example.org/p3> ?_o + ] . diff --git a/test/meson.build b/test/meson.build index e6b0fe5c..a2436b22 100644 --- a/test/meson.build +++ b/test/meson.build @@ -32,6 +32,7 @@ ttl_metadata_file_paths = [ 'extra/full/manifest.ttl', 'extra/good/manifest.ttl', 'extra/lax/manifest.ttl', + 'extra/pattern/manifest.ttl', 'extra/perfect/manifest.ttl', 'extra/prefix/manifest.ttl', 'extra/pretty/manifest.ttl', @@ -430,6 +431,12 @@ test_suites = { files('extra/lax/manifest.ttl'), ns_serdtest + 'lax/', ], + 'pattern': [ + files('extra/pattern/manifest.ttl'), + ns_serdtest + 'pattern/', + '--', + '-x', + ], 'perfect_forward': [ files('extra/perfect/manifest.ttl'), ns_serdtest + 'perfect/', diff --git a/test/test_overflow.c b/test/test_overflow.c index 086d353b..b81d4367 100644 --- a/test/test_overflow.c +++ b/test/test_overflow.c @@ -12,17 +12,18 @@ static const size_t min_stack_size = 4U * sizeof(size_t) + 240U; static const size_t max_stack_size = 1024U; static SerdStatus -test_size(SerdWorld* const world, - const char* const str, - const SerdSyntax syntax, - const size_t stack_size) +test_size(SerdWorld* const world, + const char* const str, + const SerdSyntax syntax, + const SerdReaderFlags flags, + const size_t stack_size) { SerdLimits limits = serd_world_limits(world); limits.reader_stack_size = stack_size; serd_world_set_limits(world, limits); SerdSink* sink = serd_sink_new(NULL, NULL, NULL); - SerdReader* const reader = serd_reader_new(world, syntax, 0U, sink); + SerdReader* const reader = serd_reader_new(world, syntax, flags, sink); if (!reader) { return SERD_BAD_STACK; } @@ -43,17 +44,18 @@ test_size(SerdWorld* const world, } static void -test_all_sizes(SerdWorld* const world, - const char* const str, - const SerdSyntax syntax) +test_all_sizes(SerdWorld* const world, + const char* const str, + const SerdSyntax syntax, + const SerdReaderFlags flags) { // Ensure reading with the maximum stack size succeeds - SerdStatus st = test_size(world, str, syntax, max_stack_size); + SerdStatus st = test_size(world, str, syntax, flags, max_stack_size); assert(!st); // Test with an increasingly smaller stack for (size_t size = max_stack_size; size > min_stack_size; --size) { - if ((st = test_size(world, str, syntax, size))) { + if ((st = test_size(world, str, syntax, flags, size))) { assert(st == SERD_BAD_STACK); } } @@ -75,7 +77,7 @@ test_ntriples_overflow(void) SerdWorld* const world = serd_world_new(); for (const char* const* t = test_strings; *t; ++t) { - test_all_sizes(world, *t, SERD_NTRIPLES); + test_all_sizes(world, *t, SERD_NTRIPLES, 0U); } serd_world_free(world); @@ -97,6 +99,7 @@ test_turtle_overflow(void) "<http://example.org/s> <http://example.org/p> _:blank .", "<http://example.org/s> <http://example.org/p> true .", "<http://example.org/s> <http://example.org/p> \"\"@en .", + "?subject ?predicate ?object .", "(((((((((42))))))))) <http://example.org/p> <http://example.org/o> .", "@prefix eg: <http://example.org/ns/test> .", "@base <http://example.org/base> .", @@ -164,7 +167,7 @@ test_turtle_overflow(void) SerdWorld* const world = serd_world_new(); for (const char* const* t = test_strings; *t; ++t) { - test_all_sizes(world, *t, SERD_TURTLE); + test_all_sizes(world, *t, SERD_TURTLE, SERD_READ_VARIABLES); } serd_world_free(world); diff --git a/tools/serd-pipe.c b/tools/serd-pipe.c index 480afa6e..b09c12e1 100644 --- a/tools/serd-pipe.c +++ b/tools/serd-pipe.c @@ -51,7 +51,8 @@ print_usage(const char* const name, const bool error) " -r ROOT_URI Keep relative URIs within ROOT_URI.\n" " -s STRING Parse STRING as input.\n" " -t Write terser output without newlines.\n" - " -v Display version information and exit.\n"; + " -v Display version information and exit.\n" + " -x Support parsing variable nodes like \"?x\".\n"; FILE* const os = error ? stderr : stdout; fprintf(os, "%s", error ? "\n" : ""); @@ -161,6 +162,9 @@ main(int argc, char** argv) writer_flags |= SERD_WRITE_TERSE; } else if (opt == 'v') { return serd_print_version(argv[0]); + } else if (opt == 'x') { + reader_flags |= SERD_READ_VARIABLES; + break; } else if (argv[a][1] == 'B') { if (++a == argc) { return missing_arg(prog, 'B'); |