aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2017-01-06 14:48:03 -0500
committerDavid Robillard <d@drobilla.net>2017-01-06 21:39:01 -0500
commit5475750ecf496c774a082464b3e60f5adce9cc8a (patch)
tree57e844edc09637096d1b5ea16dcc5ebb69f88345 /src
parent52590dbeb23100320417d6f72e20fadf215479e5 (diff)
downloadserd-5475750ecf496c774a082464b3e60f5adce9cc8a.tar.gz
serd-5475750ecf496c774a082464b3e60f5adce9cc8a.tar.bz2
serd-5475750ecf496c774a082464b3e60f5adce9cc8a.zip
Add support for reading TriG
Diffstat (limited to 'src')
-rw-r--r--src/reader.c137
-rw-r--r--src/serdi.c13
-rw-r--r--src/writer.c21
3 files changed, 119 insertions, 52 deletions
diff --git a/src/reader.c b/src/reader.c
index edeb8956..af5fb85c 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -101,13 +101,13 @@ struct SerdReaderImpl {
static inline bool
supports_fancy_literals(const SerdReader* reader)
{
- return reader->syntax == SERD_TURTLE;
+ return reader->syntax == SERD_TURTLE || reader->syntax == SERD_TRIG;
}
static inline bool
supports_relative_iris(const SerdReader* reader)
{
- return reader->syntax == SERD_TURTLE;
+ return reader->syntax == SERD_TURTLE || reader->syntax == SERD_TRIG;
}
static int
@@ -156,7 +156,7 @@ eat_byte_safe(SerdReader* reader, const uint8_t byte)
{
assert(peek_byte(reader) == byte);
switch (byte) {
- case '\0': reader->eof = true; break;
+ case '\0': reader->eof = (byte != '\0'); break;
case '\n': ++reader->cur.line; reader->cur.col = 0; break;
default: ++reader->cur.col;
}
@@ -520,13 +520,6 @@ read_ws_star(SerdReader* reader)
}
static inline bool
-read_ws_plus(SerdReader* reader)
-{
- TRY_RET(read_ws(reader));
- return read_ws_star(reader);
-}
-
-static inline bool
peek_delim(SerdReader* reader, const char delim)
{
read_ws_star(reader);
@@ -876,17 +869,13 @@ read_IRIREF(SerdReader* reader)
static bool
read_PrefixedName(SerdReader* reader, Ref dest, bool read_prefix, bool* ate_dot)
{
- if (read_prefix) {
- if (read_PN_PREFIX(reader, dest) > SERD_FAILURE) {
- return false;
- }
- }
-
- if (eat_byte_check(reader, ':') != ':') {
+ if (read_prefix && read_PN_PREFIX(reader, dest) > SERD_FAILURE) {
+ return false;
+ } else if (peek_byte(reader) != ':') {
return false;
}
- push_byte(reader, dest, ':');
+ push_byte(reader, dest, eat_byte_safe(reader, ':'));
return read_PN_LOCAL(reader, dest, ate_dot) <= SERD_FAILURE;
}
@@ -1307,7 +1296,7 @@ read_predicateObjectList(SerdReader* reader, ReadContext ctx, bool* ate_dot)
switch (c = peek_byte(reader)) {
case 0:
return false;
- case '.': case ']':
+ case '.': case ']': case '}':
return true;
case ';':
eat_byte_safe(reader, c);
@@ -1396,16 +1385,14 @@ read_collection(SerdReader* reader, ReadContext ctx, Ref* dest)
}
static Ref
-read_subject(SerdReader* reader, ReadContext ctx, Ref* dest, bool* nested)
+read_subject(SerdReader* reader, ReadContext ctx, Ref* dest, char* s_type)
{
bool ate_dot = false;
- switch (peek_byte(reader)) {
+ switch ((*s_type = peek_byte(reader))) {
case '[':
- *nested = true;
read_anon(reader, ctx, true, dest);
break;
case '(':
- *nested = true;
read_collection(reader, ctx, dest);
break;
case '_':
@@ -1417,15 +1404,36 @@ read_subject(SerdReader* reader, ReadContext ctx, Ref* dest, bool* nested)
return ate_dot ? pop_node(reader, *dest) : *dest;
}
+static Ref
+read_labelOrSubject(SerdReader* reader, ReadContext ctx)
+{
+ Ref subject = 0;
+ bool ate_dot = false;
+ switch (peek_byte(reader)) {
+ case '[':
+ eat_byte_safe(reader, '[');
+ read_ws_star(reader);
+ TRY_RET(eat_byte_check(reader, ']'));
+ return blank_id(reader);
+ case '_':
+ return read_BLANK_NODE_LABEL(reader, &ate_dot);
+ default:
+ read_iri(reader, &subject, &ate_dot);
+ }
+ return subject;
+}
+
static bool
read_triples(SerdReader* reader, ReadContext ctx, bool* ate_dot)
{
bool ret = false;
if (ctx.subject) {
- TRY_RET(read_ws_plus(reader));
- if (peek_byte(reader) == '.') {
- eat_byte_safe(reader, '.');
- *ate_dot = true;
+ read_ws_star(reader);
+ switch (peek_byte(reader)) {
+ case '.':
+ *ate_dot = eat_byte_safe(reader, '.');
+ return false;
+ case '}':
return false;
}
ret = read_predicateObjectList(reader, ctx, ate_dot);
@@ -1442,7 +1450,7 @@ read_base(SerdReader* reader, bool sparql, bool token)
}
Ref uri;
- TRY_RET(read_ws_plus(reader));
+ read_ws_star(reader);
TRY_RET(uri = read_IRIREF(reader));
if (reader->base_sink) {
reader->base_sink(reader->handle, deref(reader, uri));
@@ -1466,7 +1474,7 @@ read_prefixID(SerdReader* reader, bool sparql, bool token)
TRY_RET(eat_string(reader, "prefix", 6));
}
- TRY_RET(read_ws_plus(reader));
+ read_ws_star(reader);
bool ret = true;
Ref name = push_node(reader, SERD_LITERAL, "", 0);
if (read_PN_PREFIX(reader, name) > SERD_FAILURE) {
@@ -1521,6 +1529,30 @@ read_directive(SerdReader* reader)
return true;
}
+static bool
+read_wrappedGraph(SerdReader* reader, ReadContext* ctx)
+{
+ bool ate_dot = false;
+ char s_type = 0;
+ TRY_RET(eat_byte_check(reader, '{'));
+ read_ws_star(reader);
+ while (peek_byte(reader) != '}') {
+ ctx->subject = 0;
+ Ref subj = read_subject(reader, *ctx, &ctx->subject, &s_type);
+ if (!subj ||
+ (!read_triples(reader, *ctx, &ate_dot) && s_type != '[')) {
+ return false;
+ }
+ pop_node(reader, subj);
+ read_ws_star(reader);
+ if (peek_byte(reader) == '.') {
+ eat_byte_safe(reader, '.');
+ }
+ read_ws_star(reader);
+ }
+ return eat_byte_check(reader, '}');
+}
+
static int
tokcmp(SerdReader* reader, Ref ref, const char* tok, size_t n)
{
@@ -1545,7 +1577,7 @@ read_statement(SerdReader* reader)
ReadContext ctx = { 0, 0, 0, 0, 0, 0, &flags };
Ref subj = 0;
bool ate_dot = false;
- bool nested = false;
+ char s_type = false;
bool ret = true;
read_ws_star(reader);
switch (peek_byte(reader)) {
@@ -1556,16 +1588,38 @@ read_statement(SerdReader* reader)
TRY_RET(read_directive(reader));
read_ws_star(reader);
break;
+ case '{':
+ if (reader->syntax == SERD_TRIG) {
+ TRY_RET(read_wrappedGraph(reader, &ctx));
+ read_ws_star(reader);
+ } else {
+ return r_err(reader, SERD_ERR_BAD_SYNTAX, "graph in Turtle\n");
+ }
+ break;
default:
- subj = read_subject(reader, ctx, &ctx.subject, &nested);
+ subj = read_subject(reader, ctx, &ctx.subject, &s_type);
if (!tokcmp(reader, ctx.subject, "base", 4)) {
ret = read_base(reader, true, false);
} else if (!tokcmp(reader, ctx.subject, "prefix", 6)) {
ret = read_prefixID(reader, true, false);
+ } else if (!tokcmp(reader, ctx.subject, "graph", 5)) {
+ read_ws_star(reader);
+ TRY_RET((ctx.graph = read_labelOrSubject(reader, ctx)));
+ read_ws_star(reader);
+ TRY_RET(read_wrappedGraph(reader, &ctx));
+ read_ws_star(reader);
+ } else if (read_ws_star(reader) && peek_byte(reader) == '{') {
+ if (s_type == '(' || (s_type == '[' && !*ctx.flags)) {
+ return false; // invalid graph with complex label
+ }
+ ctx.graph = subj;
+ ctx.subject = subj = 0;
+ TRY_RET(read_wrappedGraph(reader, &ctx));
+ read_ws_star(reader);
} else if (!subj) {
ret = r_err(reader, SERD_ERR_BAD_SYNTAX, "bad subject\n");
- } else if (!read_triples(reader, ctx, &ate_dot) && !nested) {
- ret = nested;
+ } else if (!read_triples(reader, ctx, &ate_dot)) {
+ ret = (s_type == '[');
} else if (!ate_dot) {
read_ws_star(reader);
ret = (eat_byte_check(reader, '.') == '.');
@@ -1573,7 +1627,6 @@ read_statement(SerdReader* reader)
pop_node(reader, subj);
break;
}
- read_ws_star(reader); // remove?
return ret;
}
@@ -1587,21 +1640,30 @@ read_turtleDoc(SerdReader* reader)
}
static bool
+read_trigDoc(SerdReader* reader)
+{
+ while (!reader->eof) {
+ TRY_RET(read_statement(reader));
+ }
+ return !reader->error;
+}
+
+static bool
read_nquadsDoc(SerdReader* reader)
{
while (!reader->eof) {
SerdStatementFlags flags = 0;
ReadContext ctx = { 0, 0, 0, 0, 0, 0, &flags };
bool ate_dot = false;
- bool nested = false;
+ char s_type = false;
read_ws_star(reader);
if (peek_byte(reader) == '\0') {
reader->eof = true;
- return !reader->error;
+ break;
}
// subject predicate object
- if (!(ctx.subject = read_subject(reader, ctx, &ctx.subject, &nested)) ||
+ if (!(ctx.subject = read_subject(reader, ctx, &ctx.subject, &s_type)) ||
!read_ws_star(reader) ||
!(ctx.predicate = read_IRIREF(reader)) ||
!read_ws_star(reader) ||
@@ -1642,6 +1704,7 @@ read_doc(SerdReader* reader)
{
switch (reader->syntax) {
case SERD_NQUADS: return read_nquadsDoc(reader);
+ case SERD_TRIG: return read_trigDoc(reader);
default: return read_turtleDoc(reader);
}
}
diff --git a/src/serdi.c b/src/serdi.c
index be52587c..80bba270 100644
--- a/src/serdi.c
+++ b/src/serdi.c
@@ -53,9 +53,9 @@ print_usage(const char* name, bool error)
fprintf(os, " -e Eat input one character at a time.\n");
fprintf(os, " -f Keep full URIs in input (don't qualify).\n");
fprintf(os, " -h Display this help and exit.\n");
- fprintf(os, " -i SYNTAX Input syntax (turtle, ntriples, or nquads).\n");
+ fprintf(os, " -i SYNTAX Input syntax: turtle/ntriples/trig/nquads.\n");
fprintf(os, " -l Lax (non-strict) parsing.\n");
- fprintf(os, " -o SYNTAX Output syntax (turtle, ntriples, or nquads).\n");
+ fprintf(os, " -o SYNTAX Output syntax: turtle/ntriples/nquads.\n");
fprintf(os, " -p PREFIX Add PREFIX to blank node IDs.\n");
fprintf(os, " -q Suppress all output except data.\n");
fprintf(os, " -r ROOT_URI Keep relative URIs within ROOT_URI.\n");
@@ -73,6 +73,8 @@ set_syntax(SerdSyntax* syntax, const char* name)
*syntax = SERD_NTRIPLES;
} else if (!strcmp(name, "nquads")) {
*syntax = SERD_NQUADS;
+ } else if (!strcmp(name, "trig")) {
+ *syntax = SERD_TRIG;
} else {
SERDI_ERRORF("unknown syntax `%s'\n", name);
return false;
@@ -200,16 +202,17 @@ main(int argc, char** argv)
SerdEnv* env = serd_env_new(&base);
int output_style = 0;
- if (output_syntax == SERD_NTRIPLES) {
+ if (output_syntax == SERD_NTRIPLES || output_syntax == SERD_NQUADS) {
output_style |= SERD_STYLE_ASCII;
- } else {
+ } else if (output_syntax == SERD_TURTLE) {
output_style |= SERD_STYLE_ABBREVIATED;
if (!full_uris) {
output_style |= SERD_STYLE_CURIED;
}
}
- if (input_syntax != SERD_NTRIPLES || (output_style & SERD_STYLE_CURIED)) {
+ if ((input_syntax == SERD_TURTLE || input_syntax == SERD_TRIG) ||
+ (output_style & SERD_STYLE_CURIED)) {
// Base URI may change and/or we're abbreviating URIs, so must resolve
output_style |= SERD_STYLE_RESOLVED; // Base may chan
}
diff --git a/src/writer.c b/src/writer.c
index ce13d79b..5bb3bd0d 100644
--- a/src/writer.c
+++ b/src/writer.c
@@ -356,7 +356,7 @@ write_text(SerdWriter* writer, TextContext ctx,
case '"': len += sink("\\\"", 2, writer); continue;
default: break;
}
- if (writer->syntax != SERD_NTRIPLES) {
+ if (writer->syntax == SERD_TURTLE) {
switch (in) {
case '\b': len += sink("\\b", 2, writer); continue;
case '\f': len += sink("\\f", 2, writer); continue;
@@ -438,7 +438,7 @@ typedef enum {
static bool
is_inline_start(const SerdWriter* writer, Field field, SerdStatementFlags flags)
{
- return (writer->syntax != SERD_NTRIPLES &&
+ return (writer->syntax == SERD_TURTLE &&
((field == FIELD_SUBJECT && (flags & SERD_ANON_S_BEGIN)) ||
(field == FIELD_OBJECT && (flags & SERD_ANON_O_BEGIN))));
}
@@ -460,19 +460,19 @@ write_node(SerdWriter* writer,
if (is_inline_start(writer, field, flags)) {
++writer->indent;
write_sep(writer, SEP_ANON_BEGIN);
- } else if (writer->syntax != SERD_NTRIPLES
+ } else if (writer->syntax == SERD_TURTLE
&& (field == FIELD_SUBJECT && (flags & SERD_LIST_S_BEGIN))) {
assert(writer->list_depth == 0);
copy_node(&writer->list_subj, node);
++writer->list_depth;
++writer->indent;
write_sep(writer, SEP_LIST_BEGIN);
- } else if (writer->syntax != SERD_NTRIPLES
+ } else if (writer->syntax == SERD_TURTLE
&& (field == FIELD_OBJECT && (flags & SERD_LIST_O_BEGIN))) {
++writer->indent;
++writer->list_depth;
write_sep(writer, SEP_LIST_BEGIN);
- } else if (writer->syntax != SERD_NTRIPLES
+ } else if (writer->syntax == SERD_TURTLE
&& ((field == FIELD_SUBJECT && (flags & SERD_EMPTY_S))
|| (field == FIELD_OBJECT && (flags & SERD_EMPTY_O)))) {
sink("[]", 2, writer);
@@ -504,6 +504,7 @@ write_node(SerdWriter* writer,
sink(">", 1, writer);
break;
case SERD_TURTLE:
+ case SERD_TRIG:
if (is_inline_start(writer, field, flags)) {
++writer->indent;
write_sep(writer, SEP_ANON_BEGIN);
@@ -536,7 +537,7 @@ write_node(SerdWriter* writer,
break;
}
}
- if (writer->syntax != SERD_NTRIPLES
+ if (writer->syntax == SERD_TURTLE
&& (node->flags & (SERD_HAS_NEWLINE|SERD_HAS_QUOTE))) {
sink("\"\"\"", 3, writer);
write_text(writer, WRITE_LONG_STRING, node->buf, node->n_bytes);
@@ -585,7 +586,7 @@ write_node(SerdWriter* writer,
bool rooted = uri_is_under(&writer->base_uri, &writer->root_uri);
SerdURI* root = rooted ? &writer->root_uri : & writer->base_uri;
if (!uri_is_under(&abs_uri, root) ||
- writer->syntax == SERD_NTRIPLES) {
+ writer->syntax != SERD_TURTLE) {
serd_uri_serialise(&abs_uri, uri_sink, writer);
} else {
serd_uri_serialise_relative(
@@ -761,7 +762,7 @@ SerdStatus
serd_writer_end_anon(SerdWriter* writer,
const SerdNode* node)
{
- if (writer->syntax == SERD_NTRIPLES) {
+ if (writer->syntax == SERD_NTRIPLES || writer->syntax == SERD_NQUADS) {
return SERD_SUCCESS;
}
if (serd_stack_is_empty(&writer->anon_stack) || writer->indent == 0) {
@@ -858,7 +859,7 @@ serd_writer_set_base_uri(SerdWriter* writer,
if (!serd_env_set_base_uri(writer->env, uri)) {
serd_env_get_base_uri(writer->env, &writer->base_uri);
- if (writer->syntax != SERD_NTRIPLES) {
+ if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) {
if (writer->context.graph.type || writer->context.subject.type) {
sink(" .\n\n", 4, writer);
reset_context(writer, false);
@@ -896,7 +897,7 @@ serd_writer_set_prefix(SerdWriter* writer,
const SerdNode* uri)
{
if (!serd_env_set_prefix(writer->env, name, uri)) {
- if (writer->syntax != SERD_NTRIPLES) {
+ if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) {
if (writer->context.graph.type || writer->context.subject.type) {
sink(" .\n\n", 4, writer);
reset_context(writer, false);