aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2019-12-18 19:09:49 -0500
committerDavid Robillard <d@drobilla.net>2019-12-20 10:26:55 -0500
commit1c3a97e8a0ca77f872783cc5471aa95230366e16 (patch)
tree6141fbd6fa4a36e52943f198b87cb02f8f3dcd51
parent4a9ff404a147bdb24aac14624ae12a85103d75a2 (diff)
downloadserd-1c3a97e8a0ca77f872783cc5471aa95230366e16.tar.gz
serd-1c3a97e8a0ca77f872783cc5471aa95230366e16.tar.bz2
serd-1c3a97e8a0ca77f872783cc5471aa95230366e16.zip
Add support for parsing variables
This adds a reader flag and serdi option for extending a syntax with support for SPARQL-like variables, for storing things like patterns or simple queries.
-rw-r--r--NEWS3
-rw-r--r--doc/serdi.15
-rw-r--r--serd/serd.h14
-rw-r--r--src/n3.c79
-rw-r--r--src/reader.c1
-rw-r--r--src/reader.h1
-rw-r--r--src/serdi.c3
-rw-r--r--src/writer.c17
-rw-r--r--tests/bad/bad-var.ttl2
-rw-r--r--tests/bad/manifest.ttl6
-rw-r--r--tests/overflow_test.c5
-rw-r--r--tests/pattern/bad-pattern.nq1
-rw-r--r--tests/pattern/bad-pattern.ttl2
-rw-r--r--tests/pattern/manifest.ttl35
-rw-r--r--tests/pattern/test-pattern.nt6
-rw-r--r--tests/pattern/test-pattern.ttl10
-rw-r--r--wscript3
17 files changed, 179 insertions, 14 deletions
diff --git a/NEWS b/NEWS
index 53772fe2..7f0f58ed 100644
--- a/NEWS
+++ b/NEWS
@@ -7,6 +7,7 @@ serd (1.0.1) unstable;
* Add model for storing statements in memory
* Add option for writing terse output without newlines
* Add support for basic literal normalisation
+ * Add support for parsing variables
* Add support for validation
* Add support for writing terse collections
* Add support for xsd:float and xsd:double literals
@@ -22,7 +23,7 @@ serd (1.0.1) unstable;
* Use a fixed-size reader stack
* Use char* for strings in public API
- -- David Robillard <d@drobilla.net> Sat, 19 Jan 2019 12:31:12 +0000
+ -- David Robillard <d@drobilla.net> Thu, 19 Dec 2019 14:47:28 -0500
serd (0.30.3) unstable;
diff --git a/doc/serdi.1 b/doc/serdi.1
index db1a81b0..0ecacd2d 100644
--- a/doc/serdi.1
+++ b/doc/serdi.1
@@ -104,6 +104,11 @@ Write terser output without newlines.
.BR \-v
Display version information and exit.
+.TP
+.BR \-x
+Support parsing variable nodes.
+Variables can be written in SPARQL style, for example \*(lq?var\*(rq or \*(lq$var\*(rq.
+
.SH AUTHOR
Serdi was written by David Robillard <d@drobilla.net>
diff --git a/serd/serd.h b/serd/serd.h
index 258effbb..e96d46f7 100644
--- a/serd/serd.h
+++ b/serd/serd.h
@@ -188,7 +188,16 @@ typedef enum {
is meaningful only within this serialisation. @see [RDF 1.1
Turtle](http://www.w3.org/TR/turtle/#grammar-production-BLANK_NODE_LABEL)
*/
- SERD_BLANK = 4
+ SERD_BLANK = 4,
+
+ /**
+ A variable node
+
+ Value is a variable name without any syntactic prefix, like "name",
+ which is meaningful only within this serialisation. @see [SPARQL 1.1
+ Query Language](https://www.w3.org/TR/sparql11-query/#rVar)
+ */
+ SERD_VARIABLE = 5
} SerdNodeType;
/// Flags indicating certain string properties relevant to serialisation
@@ -258,7 +267,8 @@ typedef struct {
/// Reader options
typedef enum {
- SERD_READ_LAX = 1 << 0 ///< Tolerate invalid input where possible
+ SERD_READ_LAX = 1 << 0, ///< Tolerate invalid input where possible
+ SERD_READ_VARIABLES = 1 << 1 ///< Support variable nodes
} SerdReaderFlag;
/// Bitwise OR of SerdReaderFlag values
diff --git a/src/n3.c b/src/n3.c
index 72e77a1e..45933efe 100644
--- a/src/n3.c
+++ b/src/n3.c
@@ -837,10 +837,50 @@ read_literal(SerdReader* reader, SerdNode** dest, bool* ate_dot)
}
static SerdStatus
+read_VARNAME(SerdReader* reader, SerdNode** dest)
+{
+ // Simplified from SPARQL: VARNAME ::= (PN_CHARS_U | [0-9])+
+ SerdNode* n = *dest;
+ SerdStatus st = SERD_SUCCESS;
+ int c = 0;
+ peek_byte(reader);
+ while ((c = peek_byte(reader))) {
+ if (is_digit(c) || c == '_') {
+ push_byte(reader, n, eat_byte_safe(reader, c));
+ } else if ((st = read_PN_CHARS(reader, n))) {
+ st = st > SERD_FAILURE ? st : SERD_SUCCESS;
+ break;
+ }
+ }
+
+ return st;
+}
+
+static SerdStatus
+read_Var(SerdReader* reader, SerdNode** dest)
+{
+ if (!(reader->flags & SERD_READ_VARIABLES)) {
+ return r_err(reader, SERD_ERR_BAD_SYNTAX,
+ "syntax does not support variables\n");
+ } else if (!(*dest = push_node(reader, SERD_VARIABLE, "", 0))) {
+ return SERD_ERR_OVERFLOW;
+ }
+
+ assert(peek_byte(reader) == '$' || peek_byte(reader) == '?');
+ eat_byte(reader);
+
+ return read_VARNAME(reader, dest);
+}
+
+static SerdStatus
read_verb(SerdReader* reader, SerdNode** dest)
{
const size_t orig_stack_size = reader->stack.size;
- if (peek_byte(reader) == '<') {
+
+ switch (peek_byte(reader)) {
+ case '$': case '?':
+ return read_Var(reader, dest);
+ case '<':
return read_IRIREF(reader, dest);
}
@@ -999,15 +1039,25 @@ read_object(SerdReader* reader, ReadContext* ctx, bool emit, bool* ate_dot)
const int c = peek_byte(reader);
if (!fancy_syntax(reader)) {
switch (c) {
- case '"': case ':': case '<': case '_': break;
- default: return r_err(reader, SERD_ERR_BAD_SYNTAX,
- "expected: ':', '<', or '_'\n");
+ case '"': case ':': case '<': case '_':
+ break;
+ case '$': case '?':
+ if (reader->flags & SERD_READ_VARIABLES) {
+ break;
+ }
+ // fallthrough
+ default:
+ return r_err(reader, SERD_ERR_BAD_SYNTAX,
+ "expected: ':', '<', or '_'\n");
}
}
switch (c) {
case EOF: case '\0': case ')':
return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected object\n");
+ case '$': case '?':
+ ret = read_Var(reader, &o);
+ break;
case '[':
simple = false;
ret = read_anon(reader, *ctx, false, &o);
@@ -1203,6 +1253,9 @@ read_subject(SerdReader* reader, ReadContext ctx, SerdNode** dest, int* s_type)
SerdStatus st = SERD_SUCCESS;
bool ate_dot = false;
switch ((*s_type = peek_byte(reader))) {
+ case '$': case '?':
+ st = read_Var(reader, dest);
+ break;
case '[':
read_anon(reader, ctx, true, dest);
break;
@@ -1519,11 +1572,21 @@ read_nquadsDoc(SerdReader* reader)
return SERD_ERR_BAD_SYNTAX;
}
- // subject predicate object
if ((st = read_subject(reader, ctx, &ctx.subject, &s_type)) ||
- !read_ws_star(reader) ||
- (st = read_IRIREF(reader, &ctx.predicate)) ||
- !read_ws_star(reader) ||
+ !read_ws_star(reader)) {
+ return st;
+ }
+
+ switch (peek_byte(reader)) {
+ case '$': case '?':
+ st = read_Var(reader, &ctx.predicate);
+ break;
+ case '<':
+ st = read_IRIREF(reader, &ctx.predicate);
+ break;
+ }
+
+ if (st || !read_ws_star(reader) ||
(st = read_object(reader, &ctx, false, &ate_dot))) {
return st;
}
diff --git a/src/reader.c b/src/reader.c
index c407e6ba..d0a9c4e9 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -174,6 +174,7 @@ serd_reader_new(SerdWorld* world,
me->sink = sink;
me->stack = serd_stack_new(stack_size);
me->syntax = syntax;
+ me->flags = flags;
me->next_id = 1;
me->strict = !(flags & SERD_READ_LAX);
diff --git a/src/reader.h b/src/reader.h
index 01462af8..58b5ea26 100644
--- a/src/reader.h
+++ b/src/reader.h
@@ -60,6 +60,7 @@ struct SerdReaderImpl {
SerdByteSource source;
SerdStack stack;
SerdSyntax syntax;
+ SerdReaderFlags flags;
unsigned next_id;
uint8_t* buf;
char* bprefix;
diff --git a/src/serdi.c b/src/serdi.c
index eed54622..14e88354 100644
--- a/src/serdi.c
+++ b/src/serdi.c
@@ -73,6 +73,7 @@ print_usage(const char* name, bool error)
fprintf(os, " -s INPUT Parse INPUT as string.\n");
fprintf(os, " -t Write terser output without newlines.\n");
fprintf(os, " -v Display version information and exit.\n");
+ fprintf(os, " -x Support parsing variable nodes like `?x'.\n");
return error ? 1 : 0;
}
@@ -187,6 +188,8 @@ main(int argc, char** argv)
quiet = true;
} else if (argv[a][1] == 'v') {
return print_version();
+ } else if (argv[a][1] == 'x') {
+ reader_flags |= SERD_READ_VARIABLES;
} else if (argv[a][1] == 's') {
if (++a == argc) {
return missing_arg(argv[0], 's');
diff --git a/src/writer.c b/src/writer.c
index a1dd8d97..b1640af7 100644
--- a/src/writer.c
+++ b/src/writer.c
@@ -726,6 +726,20 @@ write_blank(SerdWriter* const writer,
}
SERD_WARN_UNUSED_RESULT static SerdStatus
+write_variable(SerdWriter* const writer,
+ const SerdNode* node,
+ const SerdField field,
+ const SerdStatementFlags flags)
+{
+ SerdStatus st = esink("?", 1, writer);
+
+ TRY(st, esink(serd_node_get_string(node), node->n_bytes, writer));
+
+ writer->last_sep = SEP_NONE;
+ return st;
+}
+
+SERD_WARN_UNUSED_RESULT static SerdStatus
write_node(SerdWriter* writer,
const SerdNode* node,
const SerdField field,
@@ -745,6 +759,9 @@ write_node(SerdWriter* writer,
case SERD_BLANK:
st = write_blank(writer, node, field, flags);
break;
+ case SERD_VARIABLE:
+ st = write_variable(writer, node, field, flags);
+ break;
}
return st;
}
diff --git a/tests/bad/bad-var.ttl b/tests/bad/bad-var.ttl
new file mode 100644
index 00000000..29b5b008
--- /dev/null
+++ b/tests/bad/bad-var.ttl
@@ -0,0 +1,2 @@
+?s
+ <http://example.org/p> <http://example.org/o> .
diff --git a/tests/bad/manifest.ttl b/tests/bad/manifest.ttl
index d294f120..eb5ac65b 100644
--- a/tests/bad/manifest.ttl
+++ b/tests/bad/manifest.ttl
@@ -67,6 +67,7 @@
<#bad-string>
<#bad-subject>
<#bad-uri-escape>
+ <#bad-var>
<#bad-verb>
<#invalid-char-in-local>
<#invalid-char-in-prefix>
@@ -376,6 +377,11 @@
mf:name "bad-uri-escape" ;
mf:action <bad-uri-escape.ttl> .
+<#bad-var>
+ rdf:type rdft:TestTurtleNegativeSyntax ;
+ mf:name "bad-var" ;
+ mf:action <bad-var.ttl> .
+
<#bad-verb>
rdf:type rdft:TestTurtleNegativeSyntax ;
mf:name "bad-verb" ;
diff --git a/tests/overflow_test.c b/tests/overflow_test.c
index 98582597..ff5c5350 100644
--- a/tests/overflow_test.c
+++ b/tests/overflow_test.c
@@ -24,8 +24,8 @@
static SerdStatus
test(SerdWorld* world, SerdSink* sink, const char* str, size_t stack_size)
{
- SerdReader* reader =
- serd_reader_new(world, SERD_TURTLE, 0, sink, stack_size);
+ SerdReader* reader = serd_reader_new(
+ world, SERD_TURTLE, SERD_READ_VARIABLES, sink, stack_size);
serd_reader_start_string(reader, str, NULL);
const SerdStatus st = serd_reader_read_document(reader);
@@ -52,6 +52,7 @@ main(void)
{":s :p \"literal\"", 336},
{":s :p \"verb\"", 275},
{":s :p _:blank .", 307},
+ {":s :p ?o .", 307},
{":s :p true .", 307},
{":s :p true .", 341},
{":s :p \"\"@en .", 339},
diff --git a/tests/pattern/bad-pattern.nq b/tests/pattern/bad-pattern.nq
new file mode 100644
index 00000000..a7e0c994
--- /dev/null
+++ b/tests/pattern/bad-pattern.nq
@@ -0,0 +1 @@
+<http://example.org/s> ?pµ <http://example.org/o> .
diff --git a/tests/pattern/bad-pattern.ttl b/tests/pattern/bad-pattern.ttl
new file mode 100644
index 00000000..5f3dbfdd
--- /dev/null
+++ b/tests/pattern/bad-pattern.ttl
@@ -0,0 +1,2 @@
+?sµ
+ <http://example.org/p> <http://example.org/o> .
diff --git a/tests/pattern/manifest.ttl b/tests/pattern/manifest.ttl
new file mode 100644
index 00000000..a179a64d
--- /dev/null
+++ b/tests/pattern/manifest.ttl
@@ -0,0 +1,35 @@
+@prefix mf: <http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#> .
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix rdft: <http://www.w3.org/ns/rdftest#> .
+
+<>
+ rdf:type mf:Manifest ;
+ rdfs:comment "Serd pattern syntax test cases" ;
+ mf:entries (
+ <#bad-pattern-nq>
+ <#bad-pattern-ttl>
+ <#test-pattern-nt>
+ <#test-pattern-ttl>
+ ) .
+
+<#bad-pattern-nq>
+ rdf:type rdft:TestNQuadsNegativeSyntax ;
+ mf:name "bad-pattern-nq" ;
+ mf:action <bad-pattern.nq> .
+
+<#bad-pattern-ttl>
+ rdf:type rdft:TestTurtleNegativeSyntax ;
+ mf:name "bad-pattern" ;
+ mf:action <bad-pattern.ttl> .
+
+<#test-pattern-nt>
+ rdf:type rdft:TestNTriplesPositiveSyntax ;
+ mf:name "test-pattern-nt" ;
+ mf:action <test-pattern.nt> .
+
+<#test-pattern-ttl>
+ rdf:type rdft:TestTurtleEval ;
+ mf:name "test-pattern" ;
+ mf:action <test-pattern.ttl> ;
+ mf:result <test-pattern.nt> .
diff --git a/tests/pattern/test-pattern.nt b/tests/pattern/test-pattern.nt
new file mode 100644
index 00000000..ddfe6d3c
--- /dev/null
+++ b/tests/pattern/test-pattern.nt
@@ -0,0 +1,6 @@
+?s <http://example.org/p1> <http://example.org/o1> .
+<http://example.org/s> ?p <http://example.org/o1> .
+<http://example.org/s> <http://example.org/p1> ?o .
+<http://example.org/s> <http://example.org/p2> _:b1 .
+_:b1 ?2p <http://example.org/o2> .
+_:b1 <http://example.org/p3> ?_o .
diff --git a/tests/pattern/test-pattern.ttl b/tests/pattern/test-pattern.ttl
new file mode 100644
index 00000000..3742e5ed
--- /dev/null
+++ b/tests/pattern/test-pattern.ttl
@@ -0,0 +1,10 @@
+?s
+ <http://example.org/p1> <http://example.org/o1> .
+
+<http://example.org/s>
+ ?p <http://example.org/o1> ;
+ <http://example.org/p1> ?o ;
+ <http://example.org/p2> [
+ ?2p <http://example.org/o2> ;
+ <http://example.org/p3> ?_o
+ ] .
diff --git a/wscript b/wscript
index 5f2dd5bf..ba03695c 100644
--- a/wscript
+++ b/wscript
@@ -572,7 +572,7 @@ def test(tst):
import tempfile
# Create test output directories
- for i in ['bad', 'good', 'lax', 'normalise', 'terse', 'multifile',
+ for i in ['bad', 'good', 'lax', 'normalise', 'pattern', 'terse', 'multifile',
'TurtleTests', 'NTriplesTests', 'NQuadsTests', 'TriGTests']:
try:
test_dir = os.path.join('tests', i)
@@ -697,6 +697,7 @@ def test(tst):
test_suite(tst, serd_base + 'bad/', 'bad', None, 'Turtle')
test_suite(tst, serd_base + 'lax/', 'lax', None, 'Turtle', ['-l'])
test_suite(tst, serd_base + 'lax/', 'lax', None, 'Turtle')
+ test_suite(tst, serd_base + 'pattern/', 'pattern', None, 'Turtle', ['-x'])
test_suite(tst, serd_base + 'normalise/', 'normalise', None, 'Turtle',
['-n'])
test_suite(tst, serd_base + 'terse/', 'terse', None, 'Turtle', ['-t'],