diff options
author | David Robillard <d@drobilla.net> | 2017-01-06 14:48:03 -0500 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2017-01-06 21:39:01 -0500 |
commit | 5475750ecf496c774a082464b3e60f5adce9cc8a (patch) | |
tree | 57e844edc09637096d1b5ea16dcc5ebb69f88345 /src | |
parent | 52590dbeb23100320417d6f72e20fadf215479e5 (diff) | |
download | serd-5475750ecf496c774a082464b3e60f5adce9cc8a.tar.gz serd-5475750ecf496c774a082464b3e60f5adce9cc8a.tar.bz2 serd-5475750ecf496c774a082464b3e60f5adce9cc8a.zip |
Add support for reading TriG
Diffstat (limited to 'src')
-rw-r--r-- | src/reader.c | 137 | ||||
-rw-r--r-- | src/serdi.c | 13 | ||||
-rw-r--r-- | src/writer.c | 21 |
3 files changed, 119 insertions, 52 deletions
diff --git a/src/reader.c b/src/reader.c index edeb8956..af5fb85c 100644 --- a/src/reader.c +++ b/src/reader.c @@ -101,13 +101,13 @@ struct SerdReaderImpl { static inline bool supports_fancy_literals(const SerdReader* reader) { - return reader->syntax == SERD_TURTLE; + return reader->syntax == SERD_TURTLE || reader->syntax == SERD_TRIG; } static inline bool supports_relative_iris(const SerdReader* reader) { - return reader->syntax == SERD_TURTLE; + return reader->syntax == SERD_TURTLE || reader->syntax == SERD_TRIG; } static int @@ -156,7 +156,7 @@ eat_byte_safe(SerdReader* reader, const uint8_t byte) { assert(peek_byte(reader) == byte); switch (byte) { - case '\0': reader->eof = true; break; + case '\0': reader->eof = (byte != '\0'); break; case '\n': ++reader->cur.line; reader->cur.col = 0; break; default: ++reader->cur.col; } @@ -520,13 +520,6 @@ read_ws_star(SerdReader* reader) } static inline bool -read_ws_plus(SerdReader* reader) -{ - TRY_RET(read_ws(reader)); - return read_ws_star(reader); -} - -static inline bool peek_delim(SerdReader* reader, const char delim) { read_ws_star(reader); @@ -876,17 +869,13 @@ read_IRIREF(SerdReader* reader) static bool read_PrefixedName(SerdReader* reader, Ref dest, bool read_prefix, bool* ate_dot) { - if (read_prefix) { - if (read_PN_PREFIX(reader, dest) > SERD_FAILURE) { - return false; - } - } - - if (eat_byte_check(reader, ':') != ':') { + if (read_prefix && read_PN_PREFIX(reader, dest) > SERD_FAILURE) { + return false; + } else if (peek_byte(reader) != ':') { return false; } - push_byte(reader, dest, ':'); + push_byte(reader, dest, eat_byte_safe(reader, ':')); return read_PN_LOCAL(reader, dest, ate_dot) <= SERD_FAILURE; } @@ -1307,7 +1296,7 @@ read_predicateObjectList(SerdReader* reader, ReadContext ctx, bool* ate_dot) switch (c = peek_byte(reader)) { case 0: return false; - case '.': case ']': + case '.': case ']': case '}': return true; case ';': eat_byte_safe(reader, c); @@ -1396,16 +1385,14 @@ read_collection(SerdReader* reader, ReadContext ctx, Ref* dest) } static Ref -read_subject(SerdReader* reader, ReadContext ctx, Ref* dest, bool* nested) +read_subject(SerdReader* reader, ReadContext ctx, Ref* dest, char* s_type) { bool ate_dot = false; - switch (peek_byte(reader)) { + switch ((*s_type = peek_byte(reader))) { case '[': - *nested = true; read_anon(reader, ctx, true, dest); break; case '(': - *nested = true; read_collection(reader, ctx, dest); break; case '_': @@ -1417,15 +1404,36 @@ read_subject(SerdReader* reader, ReadContext ctx, Ref* dest, bool* nested) return ate_dot ? pop_node(reader, *dest) : *dest; } +static Ref +read_labelOrSubject(SerdReader* reader, ReadContext ctx) +{ + Ref subject = 0; + bool ate_dot = false; + switch (peek_byte(reader)) { + case '[': + eat_byte_safe(reader, '['); + read_ws_star(reader); + TRY_RET(eat_byte_check(reader, ']')); + return blank_id(reader); + case '_': + return read_BLANK_NODE_LABEL(reader, &ate_dot); + default: + read_iri(reader, &subject, &ate_dot); + } + return subject; +} + static bool read_triples(SerdReader* reader, ReadContext ctx, bool* ate_dot) { bool ret = false; if (ctx.subject) { - TRY_RET(read_ws_plus(reader)); - if (peek_byte(reader) == '.') { - eat_byte_safe(reader, '.'); - *ate_dot = true; + read_ws_star(reader); + switch (peek_byte(reader)) { + case '.': + *ate_dot = eat_byte_safe(reader, '.'); + return false; + case '}': return false; } ret = read_predicateObjectList(reader, ctx, ate_dot); @@ -1442,7 +1450,7 @@ read_base(SerdReader* reader, bool sparql, bool token) } Ref uri; - TRY_RET(read_ws_plus(reader)); + read_ws_star(reader); TRY_RET(uri = read_IRIREF(reader)); if (reader->base_sink) { reader->base_sink(reader->handle, deref(reader, uri)); @@ -1466,7 +1474,7 @@ read_prefixID(SerdReader* reader, bool sparql, bool token) TRY_RET(eat_string(reader, "prefix", 6)); } - TRY_RET(read_ws_plus(reader)); + read_ws_star(reader); bool ret = true; Ref name = push_node(reader, SERD_LITERAL, "", 0); if (read_PN_PREFIX(reader, name) > SERD_FAILURE) { @@ -1521,6 +1529,30 @@ read_directive(SerdReader* reader) return true; } +static bool +read_wrappedGraph(SerdReader* reader, ReadContext* ctx) +{ + bool ate_dot = false; + char s_type = 0; + TRY_RET(eat_byte_check(reader, '{')); + read_ws_star(reader); + while (peek_byte(reader) != '}') { + ctx->subject = 0; + Ref subj = read_subject(reader, *ctx, &ctx->subject, &s_type); + if (!subj || + (!read_triples(reader, *ctx, &ate_dot) && s_type != '[')) { + return false; + } + pop_node(reader, subj); + read_ws_star(reader); + if (peek_byte(reader) == '.') { + eat_byte_safe(reader, '.'); + } + read_ws_star(reader); + } + return eat_byte_check(reader, '}'); +} + static int tokcmp(SerdReader* reader, Ref ref, const char* tok, size_t n) { @@ -1545,7 +1577,7 @@ read_statement(SerdReader* reader) ReadContext ctx = { 0, 0, 0, 0, 0, 0, &flags }; Ref subj = 0; bool ate_dot = false; - bool nested = false; + char s_type = false; bool ret = true; read_ws_star(reader); switch (peek_byte(reader)) { @@ -1556,16 +1588,38 @@ read_statement(SerdReader* reader) TRY_RET(read_directive(reader)); read_ws_star(reader); break; + case '{': + if (reader->syntax == SERD_TRIG) { + TRY_RET(read_wrappedGraph(reader, &ctx)); + read_ws_star(reader); + } else { + return r_err(reader, SERD_ERR_BAD_SYNTAX, "graph in Turtle\n"); + } + break; default: - subj = read_subject(reader, ctx, &ctx.subject, &nested); + subj = read_subject(reader, ctx, &ctx.subject, &s_type); if (!tokcmp(reader, ctx.subject, "base", 4)) { ret = read_base(reader, true, false); } else if (!tokcmp(reader, ctx.subject, "prefix", 6)) { ret = read_prefixID(reader, true, false); + } else if (!tokcmp(reader, ctx.subject, "graph", 5)) { + read_ws_star(reader); + TRY_RET((ctx.graph = read_labelOrSubject(reader, ctx))); + read_ws_star(reader); + TRY_RET(read_wrappedGraph(reader, &ctx)); + read_ws_star(reader); + } else if (read_ws_star(reader) && peek_byte(reader) == '{') { + if (s_type == '(' || (s_type == '[' && !*ctx.flags)) { + return false; // invalid graph with complex label + } + ctx.graph = subj; + ctx.subject = subj = 0; + TRY_RET(read_wrappedGraph(reader, &ctx)); + read_ws_star(reader); } else if (!subj) { ret = r_err(reader, SERD_ERR_BAD_SYNTAX, "bad subject\n"); - } else if (!read_triples(reader, ctx, &ate_dot) && !nested) { - ret = nested; + } else if (!read_triples(reader, ctx, &ate_dot)) { + ret = (s_type == '['); } else if (!ate_dot) { read_ws_star(reader); ret = (eat_byte_check(reader, '.') == '.'); @@ -1573,7 +1627,6 @@ read_statement(SerdReader* reader) pop_node(reader, subj); break; } - read_ws_star(reader); // remove? return ret; } @@ -1587,21 +1640,30 @@ read_turtleDoc(SerdReader* reader) } static bool +read_trigDoc(SerdReader* reader) +{ + while (!reader->eof) { + TRY_RET(read_statement(reader)); + } + return !reader->error; +} + +static bool read_nquadsDoc(SerdReader* reader) { while (!reader->eof) { SerdStatementFlags flags = 0; ReadContext ctx = { 0, 0, 0, 0, 0, 0, &flags }; bool ate_dot = false; - bool nested = false; + char s_type = false; read_ws_star(reader); if (peek_byte(reader) == '\0') { reader->eof = true; - return !reader->error; + break; } // subject predicate object - if (!(ctx.subject = read_subject(reader, ctx, &ctx.subject, &nested)) || + if (!(ctx.subject = read_subject(reader, ctx, &ctx.subject, &s_type)) || !read_ws_star(reader) || !(ctx.predicate = read_IRIREF(reader)) || !read_ws_star(reader) || @@ -1642,6 +1704,7 @@ read_doc(SerdReader* reader) { switch (reader->syntax) { case SERD_NQUADS: return read_nquadsDoc(reader); + case SERD_TRIG: return read_trigDoc(reader); default: return read_turtleDoc(reader); } } diff --git a/src/serdi.c b/src/serdi.c index be52587c..80bba270 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -53,9 +53,9 @@ print_usage(const char* name, bool error) fprintf(os, " -e Eat input one character at a time.\n"); fprintf(os, " -f Keep full URIs in input (don't qualify).\n"); fprintf(os, " -h Display this help and exit.\n"); - fprintf(os, " -i SYNTAX Input syntax (turtle, ntriples, or nquads).\n"); + fprintf(os, " -i SYNTAX Input syntax: turtle/ntriples/trig/nquads.\n"); fprintf(os, " -l Lax (non-strict) parsing.\n"); - fprintf(os, " -o SYNTAX Output syntax (turtle, ntriples, or nquads).\n"); + fprintf(os, " -o SYNTAX Output syntax: turtle/ntriples/nquads.\n"); fprintf(os, " -p PREFIX Add PREFIX to blank node IDs.\n"); fprintf(os, " -q Suppress all output except data.\n"); fprintf(os, " -r ROOT_URI Keep relative URIs within ROOT_URI.\n"); @@ -73,6 +73,8 @@ set_syntax(SerdSyntax* syntax, const char* name) *syntax = SERD_NTRIPLES; } else if (!strcmp(name, "nquads")) { *syntax = SERD_NQUADS; + } else if (!strcmp(name, "trig")) { + *syntax = SERD_TRIG; } else { SERDI_ERRORF("unknown syntax `%s'\n", name); return false; @@ -200,16 +202,17 @@ main(int argc, char** argv) SerdEnv* env = serd_env_new(&base); int output_style = 0; - if (output_syntax == SERD_NTRIPLES) { + if (output_syntax == SERD_NTRIPLES || output_syntax == SERD_NQUADS) { output_style |= SERD_STYLE_ASCII; - } else { + } else if (output_syntax == SERD_TURTLE) { output_style |= SERD_STYLE_ABBREVIATED; if (!full_uris) { output_style |= SERD_STYLE_CURIED; } } - if (input_syntax != SERD_NTRIPLES || (output_style & SERD_STYLE_CURIED)) { + if ((input_syntax == SERD_TURTLE || input_syntax == SERD_TRIG) || + (output_style & SERD_STYLE_CURIED)) { // Base URI may change and/or we're abbreviating URIs, so must resolve output_style |= SERD_STYLE_RESOLVED; // Base may chan } diff --git a/src/writer.c b/src/writer.c index ce13d79b..5bb3bd0d 100644 --- a/src/writer.c +++ b/src/writer.c @@ -356,7 +356,7 @@ write_text(SerdWriter* writer, TextContext ctx, case '"': len += sink("\\\"", 2, writer); continue; default: break; } - if (writer->syntax != SERD_NTRIPLES) { + if (writer->syntax == SERD_TURTLE) { switch (in) { case '\b': len += sink("\\b", 2, writer); continue; case '\f': len += sink("\\f", 2, writer); continue; @@ -438,7 +438,7 @@ typedef enum { static bool is_inline_start(const SerdWriter* writer, Field field, SerdStatementFlags flags) { - return (writer->syntax != SERD_NTRIPLES && + return (writer->syntax == SERD_TURTLE && ((field == FIELD_SUBJECT && (flags & SERD_ANON_S_BEGIN)) || (field == FIELD_OBJECT && (flags & SERD_ANON_O_BEGIN)))); } @@ -460,19 +460,19 @@ write_node(SerdWriter* writer, if (is_inline_start(writer, field, flags)) { ++writer->indent; write_sep(writer, SEP_ANON_BEGIN); - } else if (writer->syntax != SERD_NTRIPLES + } else if (writer->syntax == SERD_TURTLE && (field == FIELD_SUBJECT && (flags & SERD_LIST_S_BEGIN))) { assert(writer->list_depth == 0); copy_node(&writer->list_subj, node); ++writer->list_depth; ++writer->indent; write_sep(writer, SEP_LIST_BEGIN); - } else if (writer->syntax != SERD_NTRIPLES + } else if (writer->syntax == SERD_TURTLE && (field == FIELD_OBJECT && (flags & SERD_LIST_O_BEGIN))) { ++writer->indent; ++writer->list_depth; write_sep(writer, SEP_LIST_BEGIN); - } else if (writer->syntax != SERD_NTRIPLES + } else if (writer->syntax == SERD_TURTLE && ((field == FIELD_SUBJECT && (flags & SERD_EMPTY_S)) || (field == FIELD_OBJECT && (flags & SERD_EMPTY_O)))) { sink("[]", 2, writer); @@ -504,6 +504,7 @@ write_node(SerdWriter* writer, sink(">", 1, writer); break; case SERD_TURTLE: + case SERD_TRIG: if (is_inline_start(writer, field, flags)) { ++writer->indent; write_sep(writer, SEP_ANON_BEGIN); @@ -536,7 +537,7 @@ write_node(SerdWriter* writer, break; } } - if (writer->syntax != SERD_NTRIPLES + if (writer->syntax == SERD_TURTLE && (node->flags & (SERD_HAS_NEWLINE|SERD_HAS_QUOTE))) { sink("\"\"\"", 3, writer); write_text(writer, WRITE_LONG_STRING, node->buf, node->n_bytes); @@ -585,7 +586,7 @@ write_node(SerdWriter* writer, bool rooted = uri_is_under(&writer->base_uri, &writer->root_uri); SerdURI* root = rooted ? &writer->root_uri : & writer->base_uri; if (!uri_is_under(&abs_uri, root) || - writer->syntax == SERD_NTRIPLES) { + writer->syntax != SERD_TURTLE) { serd_uri_serialise(&abs_uri, uri_sink, writer); } else { serd_uri_serialise_relative( @@ -761,7 +762,7 @@ SerdStatus serd_writer_end_anon(SerdWriter* writer, const SerdNode* node) { - if (writer->syntax == SERD_NTRIPLES) { + if (writer->syntax == SERD_NTRIPLES || writer->syntax == SERD_NQUADS) { return SERD_SUCCESS; } if (serd_stack_is_empty(&writer->anon_stack) || writer->indent == 0) { @@ -858,7 +859,7 @@ serd_writer_set_base_uri(SerdWriter* writer, if (!serd_env_set_base_uri(writer->env, uri)) { serd_env_get_base_uri(writer->env, &writer->base_uri); - if (writer->syntax != SERD_NTRIPLES) { + if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) { if (writer->context.graph.type || writer->context.subject.type) { sink(" .\n\n", 4, writer); reset_context(writer, false); @@ -896,7 +897,7 @@ serd_writer_set_prefix(SerdWriter* writer, const SerdNode* uri) { if (!serd_env_set_prefix(writer->env, name, uri)) { - if (writer->syntax != SERD_NTRIPLES) { + if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) { if (writer->context.graph.type || writer->context.subject.type) { sink(" .\n\n", 4, writer); reset_context(writer, false); |