diff options
author | David Robillard <d@drobilla.net> | 2017-01-05 21:40:27 -0500 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2017-01-06 01:48:28 -0500 |
commit | 5591d2367d540ffe202ebc71ffcc6ced47ea6962 (patch) | |
tree | bc61f09217105e88e5f44fc7e1968ee155ffe844 | |
parent | 0c71ed6ce02ee7ff65a5da2cf863c5712a67cb4b (diff) | |
download | serd-5591d2367d540ffe202ebc71ffcc6ced47ea6962.tar.gz serd-5591d2367d540ffe202ebc71ffcc6ced47ea6962.tar.bz2 serd-5591d2367d540ffe202ebc71ffcc6ced47ea6962.zip |
Add support for SPARQL PREFIX and BASE directives
-rw-r--r-- | NEWS | 1 | ||||
-rw-r--r-- | src/reader.c | 164 | ||||
-rw-r--r-- | tests/TurtleTests/manifest.ttl | 88 |
3 files changed, 147 insertions, 106 deletions
@@ -1,6 +1,7 @@ serd (0.25.0) unstable; * Add support for NQuads + * Add support for SPARQL PREFIX and BASE directives -- David Robillard <d@drobilla.net> Thu, 05 Jan 2017 16:20:09 -0500 diff --git a/src/reader.c b/src/reader.c index ddb8551b..3916d4c5 100644 --- a/src/reader.c +++ b/src/reader.c @@ -184,12 +184,14 @@ eat_byte_check(SerdReader* reader, const uint8_t byte) return eat_byte_safe(reader, byte); } -static inline void +static inline bool eat_string(SerdReader* reader, const char* str, unsigned n) { + bool bad = false; for (unsigned i = 0; i < n; ++i) { - eat_byte_check(reader, ((const uint8_t*)str)[i]); + bad |= eat_byte_check(reader, ((const uint8_t*)str)[i]); } + return bad; } static Ref @@ -971,14 +973,11 @@ read_iri(SerdReader* reader, Ref* dest, bool* ate_dot) switch (peek_byte(reader)) { case '<': *dest = read_IRIREF(reader); - break; + return true; default: *dest = push_node(reader, SERD_CURIE, "", 0); - if (!read_PrefixedName(reader, *dest, true, ate_dot)) { - *dest = pop_node(reader, *dest); - } + return read_PrefixedName(reader, *dest, true, ate_dot); } - return *dest != 0; } static bool @@ -1004,8 +1003,8 @@ read_literal(SerdReader* reader, Ref* dest, *dest = str; return true; except: - pop_node(reader, *datatype); - pop_node(reader, *lang); + *datatype = pop_node(reader, *datatype); + *lang = pop_node(reader, *lang); pop_node(reader, str); return false; } @@ -1255,7 +1254,7 @@ read_object(SerdReader* reader, ReadContext* ctx, bool emit, bool* ate_dot) } } - if (simple) { + if (simple && o) { deref(reader, o)->flags = flags; } @@ -1397,80 +1396,79 @@ read_collection(SerdReader* reader, ReadContext ctx, Ref* dest) } static Ref -read_subject(SerdReader* reader, ReadContext ctx, bool* nested) +read_subject(SerdReader* reader, ReadContext ctx, Ref* dest, bool* nested) { - Ref subject = 0; bool ate_dot = false; switch (peek_byte(reader)) { case '[': *nested = true; - read_anon(reader, ctx, true, &subject); + read_anon(reader, ctx, true, dest); break; case '(': *nested = true; - read_collection(reader, ctx, &subject); + read_collection(reader, ctx, dest); break; case '_': - *nested = false; - subject = read_BLANK_NODE_LABEL(reader, &ate_dot); + *dest = read_BLANK_NODE_LABEL(reader, &ate_dot); break; default: - read_iri(reader, &subject, &ate_dot); + TRY_RET(read_iri(reader, dest, &ate_dot)); } - return ate_dot ? pop_node(reader, subject) : subject; + return ate_dot ? pop_node(reader, *dest) : *dest; } static bool read_triples(SerdReader* reader, ReadContext ctx, bool* ate_dot) { - bool nested = false; - const Ref subject = read_subject(reader, ctx, &nested); - bool ret = false; - if (subject) { - ctx.subject = subject; - if (nested) { - read_ws_star(reader); - ret = true; - if (peek_byte(reader) != '.') { - ret = read_predicateObjectList(reader, ctx, ate_dot); - } - } else { - TRY_RET(read_ws_plus(reader)); - ret = read_predicateObjectList(reader, ctx, ate_dot); + bool ret = false; + if (ctx.subject) { + TRY_RET(read_ws_plus(reader)); + if (peek_byte(reader) == '.') { + eat_byte_safe(reader, '.'); + *ate_dot = true; + return false; } - pop_node(reader, subject); - } else { - ret = false; + ret = read_predicateObjectList(reader, ctx, ate_dot); } ctx.subject = ctx.predicate = 0; return ret; } static bool -read_base(SerdReader* reader) +read_base(SerdReader* reader, bool sparql, bool token) { - // `@' is already eaten in read_directive - eat_string(reader, "base", 4); - TRY_RET(read_ws_plus(reader)); + if (token) { + TRY_RET(eat_string(reader, "base", 4)); + } + Ref uri; + TRY_RET(read_ws_plus(reader)); TRY_RET(uri = read_IRIREF(reader)); if (reader->base_sink) { reader->base_sink(reader->handle, deref(reader, uri)); } pop_node(reader, uri); + + read_ws_star(reader); + if (!sparql) { + return eat_byte_check(reader, '.'); + } else if (peek_byte(reader) == '.') { + return r_err(reader, SERD_ERR_BAD_SYNTAX, + "full stop after SPARQL BASE\n"); + } return true; } static bool -read_prefixID(SerdReader* reader) +read_prefixID(SerdReader* reader, bool sparql, bool token) { - bool ret = true; - // `@' is already eaten in read_directive - eat_string(reader, "prefix", 6); - TRY_RET(read_ws_plus(reader)); - - Ref name = push_node(reader, SERD_LITERAL, "", 0); + if (token) { + TRY_RET(eat_string(reader, "prefix", 6)); + } + TRY_RET(read_ws_plus(reader)); + bool ret = true; + Ref name = push_node(reader, SERD_LITERAL, "", 0); if (read_PN_PREFIX(reader, name) > SERD_FAILURE) { return pop_node(reader, name); } @@ -1493,27 +1491,63 @@ read_prefixID(SerdReader* reader) } pop_node(reader, uri); pop_node(reader, name); + if (!sparql) { + read_ws_star(reader); + return eat_byte_check(reader, '.'); + } return ret; } static bool read_directive(SerdReader* reader) { - eat_byte_safe(reader, '@'); + const bool sparql = peek_byte(reader) != '@'; + if (!sparql) { + eat_byte_safe(reader, '@'); + switch (peek_byte(reader)) { + case 'B': case 'P': + return r_err(reader, SERD_ERR_BAD_SYNTAX, + "uppercase directive\n"); + } + } + switch (peek_byte(reader)) { - case 'b': return read_base(reader); - case 'p': return read_prefixID(reader); - default: return r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid directive\n"); + case 'B': case 'b': return read_base(reader, sparql, true); + case 'P': case 'p': return read_prefixID(reader, sparql, true); + default: + return r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid directive\n"); } + + return true; +} + +static int +tokcmp(SerdReader* reader, Ref ref, const char* tok, size_t n) +{ + SerdNode* node = deref(reader, ref); + if (!node || node->n_bytes != n) { + return -1; + } + const char* s1 = (const char*)node->buf; + const char* s2 = tok; + for (; n > 0 && *s2; s1++, s2++, --n) { + if (toupper(*s1) != toupper(*s2)) { + return ((*(uint8_t*)s1 < *(uint8_t*)s2) ? -1 : +1); + } + } + return 0; } static bool read_statement(SerdReader* reader) { - SerdStatementFlags flags = 0; - ReadContext ctx = { 0, 0, 0, 0, 0, 0, &flags }; + SerdStatementFlags flags = 0; + ReadContext ctx = { 0, 0, 0, 0, 0, 0, &flags }; + Ref subj = 0; + bool ate_dot = false; + bool nested = false; + bool ret = true; read_ws_star(reader); - bool ate_dot = false; switch (peek_byte(reader)) { case '\0': reader->eof = true; @@ -1521,20 +1555,26 @@ read_statement(SerdReader* reader) case '@': TRY_RET(read_directive(reader)); read_ws_star(reader); - return (eat_byte_check(reader, '.') == '.'); + break; default: - if (!read_triples(reader, ctx, &ate_dot)) { - return false; - } else if (ate_dot) { - return true; - } else { + subj = read_subject(reader, ctx, &ctx.subject, &nested); + if (!tokcmp(reader, ctx.subject, "base", 4)) { + ret = read_base(reader, true, false); + } else if (!tokcmp(reader, ctx.subject, "prefix", 6)) { + ret = read_prefixID(reader, true, false); + } else if (!subj) { + ret = r_err(reader, SERD_ERR_BAD_SYNTAX, "bad subject\n"); + } else if (!read_triples(reader, ctx, &ate_dot) && !nested) { + ret = nested; + } else if (!ate_dot) { read_ws_star(reader); - return (eat_byte_check(reader, '.') == '.'); + ret = (eat_byte_check(reader, '.') == '.'); } + pop_node(reader, subj); break; } read_ws_star(reader); // remove? - return true; + return ret; } static bool @@ -1561,7 +1601,7 @@ read_nquadsDoc(SerdReader* reader) } // subject - if (!(ctx.subject = read_subject(reader, ctx, &nested))) { + if (!(ctx.subject = read_subject(reader, ctx, &ctx.subject, &nested))) { return false; } diff --git a/tests/TurtleTests/manifest.ttl b/tests/TurtleTests/manifest.ttl index 5bbaeb6c..0a24655e 100644 --- a/tests/TurtleTests/manifest.ttl +++ b/tests/TurtleTests/manifest.ttl @@ -27,7 +27,7 @@ <#IRI_with_all_punctuation> <#bareword_a_predicate> <#old_style_prefix> - # <#SPARQL_style_prefix> + <#SPARQL_style_prefix> <#prefixed_IRI_predicate> <#prefixed_IRI_object> <#prefix_only_IRI> @@ -47,7 +47,7 @@ <#localName_with_leading_digit> <#localName_with_non_leading_extras> <#old_style_base> - # <#SPARQL_style_base> + <#SPARQL_style_base> <#labeled_blank_node_subject> <#labeled_blank_node_object> <#labeled_blank_node_with_PN_CHARS_BASE_character_boundaries> @@ -141,12 +141,12 @@ <#turtle-syntax-uri-03> <#turtle-syntax-uri-04> <#turtle-syntax-base-01> - # <#turtle-syntax-base-02> + <#turtle-syntax-base-02> <#turtle-syntax-base-03> - # <#turtle-syntax-base-04> + <#turtle-syntax-base-04> <#turtle-syntax-prefix-01> - # <#turtle-syntax-prefix-02> - # <#turtle-syntax-prefix-03> + <#turtle-syntax-prefix-02> + <#turtle-syntax-prefix-03> <#turtle-syntax-prefix-04> <#turtle-syntax-prefix-05> <#turtle-syntax-prefix-06> @@ -374,13 +374,13 @@ mf:result <IRI_spo.nt> ; . -# <#SPARQL_style_prefix> rdf:type rdft:TestTurtleEval ; -# mf:name "SPARQL_style_prefix" ; -# rdfs:comment "SPARQL-style prefix" ; -# rdft:approval rdft:Approved ; -# mf:action <SPARQL_style_prefix.ttl> ; -# mf:result <IRI_spo.nt> ; -# . +<#SPARQL_style_prefix> rdf:type rdft:TestTurtleEval ; + mf:name "SPARQL_style_prefix" ; + rdfs:comment "SPARQL-style prefix" ; + rdft:approval rdft:Approved ; + mf:action <SPARQL_style_prefix.ttl> ; + mf:result <IRI_spo.nt> ; + . <#prefixed_IRI_predicate> rdf:type rdft:TestTurtleEval ; mf:name "prefixed_IRI_predicate" ; @@ -534,13 +534,13 @@ mf:result <IRI_spo.nt> ; . -# <#SPARQL_style_base> rdf:type rdft:TestTurtleEval ; -# mf:name "SPARQL_style_base" ; -# rdfs:comment "SPARQL-style base" ; -# rdft:approval rdft:Approved ; -# mf:action <SPARQL_style_base.ttl> ; -# mf:result <IRI_spo.nt> ; -# . +<#SPARQL_style_base> rdf:type rdft:TestTurtleEval ; + mf:name "SPARQL_style_base" ; + rdfs:comment "SPARQL-style base" ; + rdft:approval rdft:Approved ; + mf:action <SPARQL_style_base.ttl> ; + mf:result <IRI_spo.nt> ; + . <#labeled_blank_node_subject> rdf:type rdft:TestTurtleEval ; mf:name "labeled_blank_node_subject" ; @@ -1151,12 +1151,12 @@ mf:action <turtle-syntax-base-01.ttl> ; . -# <#turtle-syntax-base-02> rdf:type rdft:TestTurtlePositiveSyntax ; -# mf:name "turtle-syntax-base-02" ; -# rdfs:comment "BASE" ; -# rdft:approval rdft:Approved ; -# mf:action <turtle-syntax-base-02.ttl> ; -# . +<#turtle-syntax-base-02> rdf:type rdft:TestTurtlePositiveSyntax ; + mf:name "turtle-syntax-base-02" ; + rdfs:comment "BASE" ; + rdft:approval rdft:Approved ; + mf:action <turtle-syntax-base-02.ttl> ; + . <#turtle-syntax-base-03> rdf:type rdft:TestTurtlePositiveSyntax ; mf:name "turtle-syntax-base-03" ; @@ -1165,12 +1165,12 @@ mf:action <turtle-syntax-base-03.ttl> ; . -# <#turtle-syntax-base-04> rdf:type rdft:TestTurtlePositiveSyntax ; -# mf:name "turtle-syntax-base-04" ; -# rdfs:comment "base with relative IRIs" ; -# rdft:approval rdft:Approved ; -# mf:action <turtle-syntax-base-04.ttl> ; -# . +<#turtle-syntax-base-04> rdf:type rdft:TestTurtlePositiveSyntax ; + mf:name "turtle-syntax-base-04" ; + rdfs:comment "base with relative IRIs" ; + rdft:approval rdft:Approved ; + mf:action <turtle-syntax-base-04.ttl> ; + . <#turtle-syntax-prefix-01> rdf:type rdft:TestTurtlePositiveSyntax ; mf:name "turtle-syntax-prefix-01" ; @@ -1179,19 +1179,19 @@ mf:action <turtle-syntax-prefix-01.ttl> ; . -# <#turtle-syntax-prefix-02> rdf:type rdft:TestTurtlePositiveSyntax ; -# mf:name "turtle-syntax-prefix-02" ; -# rdfs:comment "PreFIX" ; -# rdft:approval rdft:Approved ; -# mf:action <turtle-syntax-prefix-02.ttl> ; -# . +<#turtle-syntax-prefix-02> rdf:type rdft:TestTurtlePositiveSyntax ; + mf:name "turtle-syntax-prefix-02" ; + rdfs:comment "PreFIX" ; + rdft:approval rdft:Approved ; + mf:action <turtle-syntax-prefix-02.ttl> ; + . -# <#turtle-syntax-prefix-03> rdf:type rdft:TestTurtlePositiveSyntax ; -# mf:name "turtle-syntax-prefix-03" ; -# rdfs:comment "Empty PREFIX" ; -# rdft:approval rdft:Approved ; -# mf:action <turtle-syntax-prefix-03.ttl> ; -# . +<#turtle-syntax-prefix-03> rdf:type rdft:TestTurtlePositiveSyntax ; + mf:name "turtle-syntax-prefix-03" ; + rdfs:comment "Empty PREFIX" ; + rdft:approval rdft:Approved ; + mf:action <turtle-syntax-prefix-03.ttl> ; + . <#turtle-syntax-prefix-04> rdf:type rdft:TestTurtlePositiveSyntax ; mf:name "turtle-syntax-prefix-04" ; |