diff options
29 files changed, 275 insertions, 24 deletions
@@ -3,6 +3,7 @@ serd (1.0.1) unstable; * Add SerdBuffer for mutable buffers to keep SerdChunk const-correct * Add SerdWorld for shared library state * Add option for writing terse output without newlines + * Add support for parsing variables * Add support for writing terse collections * Add support for xsd:float and xsd:double literals * Bring read/write interface closer to C standard @@ -17,7 +18,7 @@ serd (1.0.1) unstable; * Use a fixed-size reader stack * Use char* for strings in public API - -- David Robillard <d@drobilla.net> Wed, 13 Jan 2021 13:29:44 +0000 + -- David Robillard <d@drobilla.net> Wed, 13 Jan 2021 13:37:55 +0000 serd (0.30.11) unstable; diff --git a/doc/serdi.1 b/doc/serdi.1 index f9c98492..495c3940 100644 --- a/doc/serdi.1 +++ b/doc/serdi.1 @@ -6,7 +6,7 @@ .Nd read and write RDF syntax .Sh SYNOPSIS .Nm serdi -.Op Fl abefhlqtv +.Op Fl abefhlqtvx .Op Fl I Ar base .Op Fl c Ar prefix .Op Fl i Ar syntax @@ -139,6 +139,13 @@ Display version information and exit. Write output to the given .Ar filename instead of stdout. +.Pp +.It Fl x +Support parsing variable nodes. +Variables can be written in SPARQL style, for example +.Dq ?var +or +.Dq $var . .El .Sh EXIT STATUS .Nm diff --git a/include/serd/serd.h b/include/serd/serd.h index 82292389..a98b2b8a 100644 --- a/include/serd/serd.h +++ b/include/serd/serd.h @@ -535,7 +535,8 @@ typedef struct SerdNodeImpl SerdNode; An RDF node, in the abstract sense, can be either a resource, literal, or a blank. This type is more precise, because syntactically there are two ways - to refer to a resource (by URI or CURIE). + to refer to a resource (by URI or CURIE). Serd also has support for + variable nodes to support some features, which are not RDF nodes. There are also two ways to refer to a blank node in syntax (by ID or anonymously), but this is handled by statement flags rather than distinct @@ -574,7 +575,16 @@ typedef enum { is meaningful only within this serialisation. @see [RDF 1.1 Turtle](http://www.w3.org/TR/turtle/#grammar-production-BLANK_NODE_LABEL) */ - SERD_BLANK = 4 + SERD_BLANK = 4, + + /** + A variable node. + + Value is a variable name without any syntactic prefix, like "name", + which is meaningful only within this serialisation. @see [SPARQL 1.1 + Query Language](https://www.w3.org/TR/sparql11-query/#rVar) + */ + SERD_VARIABLE = 5 } SerdNodeType; /** @@ -1556,7 +1566,8 @@ typedef struct SerdReaderImpl SerdReader; /// Reader options typedef enum { - SERD_READ_LAX = 1u << 0u ///< Tolerate invalid input where possible + SERD_READ_LAX = 1u << 0u, ///< Tolerate invalid input where possible + SERD_READ_VARIABLES = 1u << 1u ///< Support variable nodes } SerdReaderFlag; /// Bitwise OR of SerdReaderFlag values @@ -322,6 +322,7 @@ serd_env_expand(const SerdEnv* env, const SerdNode* node) case SERD_CURIE: return expand_curie(env, node); case SERD_BLANK: + case SERD_VARIABLE: break; } @@ -963,10 +963,53 @@ read_literal(SerdReader* const reader, } static SerdStatus -read_verb(SerdReader* const reader, SerdNode** const dest) +read_VARNAME(SerdReader* const reader, SerdNode** const dest) +{ + // Simplified from SPARQL: VARNAME ::= (PN_CHARS_U | [0-9])+ + SerdNode* n = *dest; + SerdStatus st = SERD_SUCCESS; + int c = 0; + peek_byte(reader); + while ((c = peek_byte(reader))) { + if (is_digit(c) || c == '_') { + st = push_byte(reader, n, eat_byte_safe(reader, c)); + } else if ((st = read_PN_CHARS(reader, n))) { + st = st > SERD_FAILURE ? st : SERD_SUCCESS; + break; + } + } + + return st; +} + +static SerdStatus +read_Var(SerdReader* const reader, SerdNode** const dest) +{ + if (!(reader->flags & SERD_READ_VARIABLES)) { + return r_err( + reader, SERD_ERR_BAD_SYNTAX, "syntax does not support variables\n"); + } + + if (!(*dest = push_node(reader, SERD_VARIABLE, "", 0))) { + return SERD_ERR_OVERFLOW; + } + + assert(peek_byte(reader) == '$' || peek_byte(reader) == '?'); + serd_byte_source_advance(reader->source); + + return read_VARNAME(reader, dest); +} + +static SerdStatus +read_verb(SerdReader* reader, SerdNode** dest) { const size_t orig_stack_size = reader->stack.size; - if (peek_byte(reader) == '<') { + + switch (peek_byte(reader)) { + case '$': + case '?': + return read_Var(reader, dest); + case '<': return read_IRIREF(reader, dest); } @@ -1138,6 +1181,9 @@ read_object(SerdReader* const reader, case '<': case '_': break; + case '$': + case '?': + break; default: return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected: ':', '<', or '_'\n"); } @@ -1147,6 +1193,10 @@ read_object(SerdReader* const reader, case EOF: case ')': return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected object\n"); + case '$': + case '?': + ret = read_Var(reader, &o); + break; case '[': simple = false; ret = read_anon(reader, *ctx, false, &o); @@ -1375,6 +1425,10 @@ read_subject(SerdReader* const reader, SerdStatus st = SERD_SUCCESS; bool ate_dot = false; switch ((*s_type = peek_byte(reader))) { + case '$': + case '?': + st = read_Var(reader, dest); + break; case '[': st = read_anon(reader, ctx, true, dest); break; @@ -1643,6 +1697,7 @@ read_n3_statement(SerdReader* const reader) if (s_type == '(' || (s_type == '[' && !*ctx.flags)) { return r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid graph name\n"); } + ctx.graph = ctx.subject; ctx.subject = NULL; TRY(st, read_wrappedGraph(reader, &ctx)); @@ -1658,6 +1713,7 @@ read_n3_statement(SerdReader* const reader) } return st > SERD_FAILURE ? st : SERD_ERR_BAD_SYNTAX; + } else if (!ate_dot) { read_ws_star(reader); st = eat_byte_check(reader, '.'); @@ -1717,10 +1773,22 @@ read_nquadsDoc(SerdReader* const reader) return SERD_ERR_BAD_SYNTAX; } - // subject predicate object if ((st = read_subject(reader, ctx, &ctx.subject, &s_type)) || - !read_ws_star(reader) || (st = read_IRIREF(reader, &ctx.predicate)) || - !read_ws_star(reader) || + !read_ws_star(reader)) { + return st; + } + + switch (peek_byte(reader)) { + case '$': + case '?': + st = read_Var(reader, &ctx.predicate); + break; + case '<': + st = read_IRIREF(reader, &ctx.predicate); + break; + } + + if (st || !read_ws_star(reader) || (st = read_object(reader, &ctx, false, &ate_dot))) { return st; } @@ -1730,6 +1798,9 @@ read_nquadsDoc(SerdReader* const reader) switch (peek_byte(reader)) { case '.': break; + case '?': + TRY(st, read_Var(reader, &ctx.graph)); + break; case '_': TRY(st, read_BLANK_NODE_LABEL(reader, &ctx.graph, &ate_dot)); break; @@ -170,7 +170,8 @@ result(const SerdStatus status, const size_t count) SerdNode* serd_new_simple_node(const SerdNodeType type, const SerdStringView str) { - if (type != SERD_BLANK && type != SERD_CURIE && type != SERD_URI) { + if (type != SERD_BLANK && type != SERD_CURIE && type != SERD_URI && + type != SERD_VARIABLE) { return NULL; } diff --git a/src/reader.c b/src/reader.c index fe88ee14..d70cbb53 100644 --- a/src/reader.c +++ b/src/reader.c @@ -196,6 +196,7 @@ serd_reader_new(SerdWorld* const world, me->sink = sink; me->stack = serd_stack_new(stack_size, serd_node_align); me->syntax = syntax; + me->flags = flags; me->next_id = 1; me->strict = !(flags & SERD_READ_LAX); diff --git a/src/reader.h b/src/reader.h index 76f46506..b08c3fd0 100644 --- a/src/reader.h +++ b/src/reader.h @@ -53,6 +53,7 @@ struct SerdReaderImpl { SerdByteSource* source; SerdStack stack; SerdSyntax syntax; + SerdReaderFlags flags; unsigned next_id; uint8_t* buf; char* bprefix; diff --git a/src/serdi.c b/src/serdi.c index 3025b494..9a5495af 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -74,6 +74,7 @@ print_usage(const char* const name, const bool error) fprintf(os, " -t Write terser output without newlines.\n"); fprintf(os, " -v Display version information and exit.\n"); fprintf(os, " -w FILENAME Write output to FILENAME instead of stdout.\n"); + fprintf(os, " -x Support parsing variable nodes like `?x'.\n"); return error ? 1 : 0; } @@ -190,6 +191,8 @@ main(int argc, char** argv) writer_flags |= SERD_WRITE_TERSE; } else if (opt == 'v') { return print_version(); + } else if (opt == 'x') { + reader_flags |= SERD_READ_VARIABLES; } else if (argv[a][1] == 'I') { if (++a == argc) { return missing_arg(prog, 'I'); diff --git a/src/statement.c b/src/statement.c index 51788e05..0ec6520a 100644 --- a/src/statement.c +++ b/src/statement.c @@ -28,7 +28,8 @@ is_resource(const SerdNode* const node) { const SerdNodeType type = serd_node_type(node); - return type == SERD_URI || type == SERD_CURIE || type == SERD_BLANK; + return type == SERD_URI || type == SERD_CURIE || type == SERD_BLANK || + type == SERD_VARIABLE; } bool diff --git a/src/writer.c b/src/writer.c index 2cee09c5..1ad62a18 100644 --- a/src/writer.c +++ b/src/writer.c @@ -899,6 +899,18 @@ write_blank(SerdWriter* const writer, } SERD_WARN_UNUSED_RESULT static SerdStatus +write_variable(SerdWriter* const writer, const SerdNode* const node) +{ + SerdStatus st = SERD_SUCCESS; + + TRY(st, esink("?", 1, writer)); + TRY(st, esink(serd_node_string(node), node->length, writer)); + + writer->last_sep = SEP_NONE; + return st; +} + +SERD_WARN_UNUSED_RESULT static SerdStatus write_node(SerdWriter* const writer, const SerdNode* const node, const SerdField field, @@ -919,6 +931,9 @@ write_node(SerdWriter* const writer, case SERD_BLANK: st = write_blank(writer, node, field, flags); break; + case SERD_VARIABLE: + st = write_variable(writer, node); + break; } return st; diff --git a/test/bad/bad-var.ttl b/test/bad/bad-var.ttl new file mode 100644 index 00000000..29b5b008 --- /dev/null +++ b/test/bad/bad-var.ttl @@ -0,0 +1,2 @@ +?s + <http://example.org/p> <http://example.org/o> . diff --git a/test/bad/manifest.ttl b/test/bad/manifest.ttl index 96a0b593..466a4435 100644 --- a/test/bad/manifest.ttl +++ b/test/bad/manifest.ttl @@ -70,6 +70,7 @@ <#bad-string> <#bad-subject> <#bad-uri-escape> + <#bad-var> <#bad-verb> <#invalid-char-in-local> <#invalid-char-in-prefix> @@ -394,6 +395,11 @@ mf:name "bad-uri-escape" ; mf:action <bad-uri-escape.ttl> . +<#bad-var> + rdf:type rdft:TestTurtleNegativeSyntax ; + mf:name "bad-var" ; + mf:action <bad-var.ttl> . + <#bad-verb> rdf:type rdft:TestTurtleNegativeSyntax ; mf:name "bad-verb" ; diff --git a/test/meson.build b/test/meson.build index 5063277b..222ec1bc 100644 --- a/test/meson.build +++ b/test/meson.build @@ -219,6 +219,14 @@ if get_option('utils') suite: ['rdf', 'serd'], timeout: 240) + manifest = files('pattern' / 'manifest.ttl') + base_uri = serd_base + 'pattern' + '/' + test('pattern', run_test_suite, + args: script_args + [manifest, base_uri, '--', '-x'], + env: test_env, + suite: ['rdf', 'serd'], + timeout: 240) + ### The lax suite is special because it is run twice... lax_manifest = files('lax/manifest.ttl') lax_base_uri = serd_base + name + '/' diff --git a/test/pattern/bad-pattern-graph.nq b/test/pattern/bad-pattern-graph.nq new file mode 100644 index 00000000..9c77ac9a --- /dev/null +++ b/test/pattern/bad-pattern-graph.nq @@ -0,0 +1 @@ +<http://example.org/s> <http://example.org/p> <http://example.org/o> ?gµ . diff --git a/test/pattern/bad-pattern-object.nq b/test/pattern/bad-pattern-object.nq new file mode 100644 index 00000000..7f22b520 --- /dev/null +++ b/test/pattern/bad-pattern-object.nq @@ -0,0 +1 @@ +<http://example.org/s> <http://example.org/p> ?oµ <http://example.org/g> . diff --git a/test/pattern/bad-pattern-object.nt b/test/pattern/bad-pattern-object.nt new file mode 100644 index 00000000..aef057e4 --- /dev/null +++ b/test/pattern/bad-pattern-object.nt @@ -0,0 +1 @@ +<http://example.org/s> <http://example.org/p> ?oµ . diff --git a/test/pattern/bad-pattern-object.ttl b/test/pattern/bad-pattern-object.ttl new file mode 100644 index 00000000..cc2eee77 --- /dev/null +++ b/test/pattern/bad-pattern-object.ttl @@ -0,0 +1,2 @@ +<http://example.org/s> + <http://example.org/o> ?oµ . diff --git a/test/pattern/bad-pattern-predicate.nq b/test/pattern/bad-pattern-predicate.nq new file mode 100644 index 00000000..cfe466c9 --- /dev/null +++ b/test/pattern/bad-pattern-predicate.nq @@ -0,0 +1 @@ +<http://example.org/s> ?pµ <http://example.org/o> <http://example.org/g> . diff --git a/test/pattern/bad-pattern-predicate.nt b/test/pattern/bad-pattern-predicate.nt new file mode 100644 index 00000000..a7e0c994 --- /dev/null +++ b/test/pattern/bad-pattern-predicate.nt @@ -0,0 +1 @@ +<http://example.org/s> ?pµ <http://example.org/o> . diff --git a/test/pattern/bad-pattern-predicate.ttl b/test/pattern/bad-pattern-predicate.ttl new file mode 100644 index 00000000..cc2bd6b1 --- /dev/null +++ b/test/pattern/bad-pattern-predicate.ttl @@ -0,0 +1,2 @@ +<http://example.org/s> + ?pµ <http://example.org/o> . diff --git a/test/pattern/bad-pattern-subject.nq b/test/pattern/bad-pattern-subject.nq new file mode 100644 index 00000000..341c437f --- /dev/null +++ b/test/pattern/bad-pattern-subject.nq @@ -0,0 +1 @@ +?sµ <http://example.org/p> <http://example.org/o> <http://example.org/g> . diff --git a/test/pattern/bad-pattern-subject.nt b/test/pattern/bad-pattern-subject.nt new file mode 100644 index 00000000..5bbb29ff --- /dev/null +++ b/test/pattern/bad-pattern-subject.nt @@ -0,0 +1 @@ +?sµ <http://example.org/p> <http://example.org/o> . diff --git a/test/pattern/bad-pattern-subject.ttl b/test/pattern/bad-pattern-subject.ttl new file mode 100644 index 00000000..5f3dbfdd --- /dev/null +++ b/test/pattern/bad-pattern-subject.ttl @@ -0,0 +1,2 @@ +?sµ + <http://example.org/p> <http://example.org/o> . diff --git a/test/pattern/manifest.ttl b/test/pattern/manifest.ttl new file mode 100644 index 00000000..a7f238bc --- /dev/null +++ b/test/pattern/manifest.ttl @@ -0,0 +1,89 @@ +@prefix mf: <http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#> . +@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . +@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . +@prefix rdft: <http://www.w3.org/ns/rdftest#> . + +<> + rdf:type mf:Manifest ; + rdfs:comment "Serd pattern syntax test cases" ; + mf:entries ( + <#bad-pattern-graph-nq> + <#bad-pattern-object-nq> + <#bad-pattern-object-nt> + <#bad-pattern-object-ttl> + <#bad-pattern-predicate-nq> + <#bad-pattern-predicate-nt> + <#bad-pattern-predicate-ttl> + <#bad-pattern-subject-nq> + <#bad-pattern-subject-nt> + <#bad-pattern-subject-ttl> + <#test-graph-pattern> + <#test-pattern-nt> + <#test-pattern-ttl> + ) . + +<#bad-pattern-graph-nq> + rdf:type rdft:TestNQuadsNegativeSyntax ; + mf:name "bad-pattern-graph-nq" ; + mf:action <bad-pattern-graph.nq> . + +<#bad-pattern-object-nq> + rdf:type rdft:TestNQuadsNegativeSyntax ; + mf:name "bad-pattern-object-nq" ; + mf:action <bad-pattern-object.nq> . + +<#bad-pattern-object-nt> + rdf:type rdft:TestNTriplesNegativeSyntax ; + mf:name "bad-pattern-object-nt" ; + mf:action <bad-pattern-object.nt> . + +<#bad-pattern-object-ttl> + rdf:type rdft:TestTurtleNegativeSyntax ; + mf:name "bad-pattern-object-ttl" ; + mf:action <bad-pattern-object.ttl> . + +<#bad-pattern-predicate-nq> + rdf:type rdft:TestNQuadsNegativeSyntax ; + mf:name "bad-pattern-predicate-nq" ; + mf:action <bad-pattern-predicate.nq> . + +<#bad-pattern-predicate-nt> + rdf:type rdft:TestNTriplesNegativeSyntax ; + mf:name "bad-pattern-predicate-nt" ; + mf:action <bad-pattern-predicate.nt> . + +<#bad-pattern-predicate-ttl> + rdf:type rdft:TestTurtleNegativeSyntax ; + mf:name "bad-pattern-predicate-ttl" ; + mf:action <bad-pattern-predicate.ttl> . + +<#bad-pattern-subject-nq> + rdf:type rdft:TestNQuadsNegativeSyntax ; + mf:name "bad-pattern-subject-nq" ; + mf:action <bad-pattern-subject.nq> . + +<#bad-pattern-subject-nt> + rdf:type rdft:TestNTriplesNegativeSyntax ; + mf:name "bad-pattern-subject-nt" ; + mf:action <bad-pattern-subject.nt> . + +<#bad-pattern-subject-ttl> + rdf:type rdft:TestTurtleNegativeSyntax ; + mf:name "bad-pattern-subject-ttl" ; + mf:action <bad-pattern-subject.ttl> . + +<#test-graph-pattern> + rdf:type rdft:TestNQuadsPositiveSyntax ; + mf:name "test-graph-pattern" ; + mf:action <test-graph-pattern.nq> . + +<#test-pattern-nt> + rdf:type rdft:TestNTriplesPositiveSyntax ; + mf:name "test-pattern-nt" ; + mf:action <test-pattern.nt> . + +<#test-pattern-ttl> + rdf:type rdft:TestTurtleEval ; + mf:name "test-pattern" ; + mf:action <test-pattern.ttl> ; + mf:result <test-pattern.nt> . diff --git a/test/pattern/test-graph-pattern.nq b/test/pattern/test-graph-pattern.nq new file mode 100644 index 00000000..453e9516 --- /dev/null +++ b/test/pattern/test-graph-pattern.nq @@ -0,0 +1 @@ +<http://a.example/s> <http://a.example/p> <http://a.example/o> ?g . diff --git a/test/pattern/test-pattern.nt b/test/pattern/test-pattern.nt new file mode 100644 index 00000000..ddfe6d3c --- /dev/null +++ b/test/pattern/test-pattern.nt @@ -0,0 +1,6 @@ +?s <http://example.org/p1> <http://example.org/o1> . +<http://example.org/s> ?p <http://example.org/o1> . +<http://example.org/s> <http://example.org/p1> ?o . +<http://example.org/s> <http://example.org/p2> _:b1 . +_:b1 ?2p <http://example.org/o2> . +_:b1 <http://example.org/p3> ?_o . diff --git a/test/pattern/test-pattern.ttl b/test/pattern/test-pattern.ttl new file mode 100644 index 00000000..3742e5ed --- /dev/null +++ b/test/pattern/test-pattern.ttl @@ -0,0 +1,10 @@ +?s + <http://example.org/p1> <http://example.org/o1> . + +<http://example.org/s> + ?p <http://example.org/o1> ; + <http://example.org/p1> ?o ; + <http://example.org/p2> [ + ?2p <http://example.org/o2> ; + <http://example.org/p3> ?_o + ] . diff --git a/test/test_overflow.c b/test/test_overflow.c index 6d5c6d0c..335cd5c7 100644 --- a/test/test_overflow.c +++ b/test/test_overflow.c @@ -25,15 +25,16 @@ static const size_t min_stack_size = 4 * sizeof(size_t) + 256u; static const size_t max_stack_size = 1024u; static SerdStatus -test_size(SerdWorld* const world, - const char* const str, - const SerdSyntax syntax, - const size_t stack_size) +test_size(SerdWorld* const world, + const char* const str, + const SerdSyntax syntax, + const SerdReaderFlags flags, + const size_t stack_size) { SerdSink* sink = serd_sink_new(NULL, NULL, NULL); SerdByteSource* byte_source = serd_byte_source_new_string(str, NULL); SerdReader* const reader = - serd_reader_new(world, syntax, 0u, sink, stack_size); + serd_reader_new(world, syntax, flags, sink, stack_size); assert(reader); @@ -47,17 +48,18 @@ test_size(SerdWorld* const world, } static void -test_all_sizes(SerdWorld* const world, - const char* const str, - const SerdSyntax syntax) +test_all_sizes(SerdWorld* const world, + const char* const str, + const SerdSyntax syntax, + const SerdReaderFlags flags) { // Ensure reading with the maximum stack size succeeds - SerdStatus st = test_size(world, str, syntax, max_stack_size); + SerdStatus st = test_size(world, str, syntax, flags, max_stack_size); assert(!st); // Test with an increasingly smaller stack for (size_t size = max_stack_size; size > min_stack_size; --size) { - if ((st = test_size(world, str, syntax, size))) { + if ((st = test_size(world, str, syntax, flags, size))) { assert(st == SERD_ERR_OVERFLOW); } } @@ -76,7 +78,7 @@ test_ntriples_overflow(void) SerdWorld* const world = serd_world_new(); for (const char* const* t = test_strings; *t; ++t) { - test_all_sizes(world, *t, SERD_NTRIPLES); + test_all_sizes(world, *t, SERD_NTRIPLES, 0u); } serd_world_free(world); @@ -98,6 +100,7 @@ test_turtle_overflow(void) "<http://example.org/s> <http://example.org/p> _:blank .", "<http://example.org/s> <http://example.org/p> true .", "<http://example.org/s> <http://example.org/p> \"\"@en .", + "?subject ?predicate ?object .", "(((((((((42))))))))) <http://example.org/p> <http://example.org/o> .", "@prefix eg: <http://example.org/ns/test> .", "@base <http://example.org/base> .", @@ -165,7 +168,7 @@ test_turtle_overflow(void) SerdWorld* const world = serd_world_new(); for (const char* const* t = test_strings; *t; ++t) { - test_all_sizes(world, *t, SERD_TURTLE); + test_all_sizes(world, *t, SERD_TURTLE, SERD_READ_VARIABLES); } serd_world_free(world); |