aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2017-01-05 16:22:52 -0500
committerDavid Robillard <d@drobilla.net>2017-01-05 16:22:52 -0500
commit0c71ed6ce02ee7ff65a5da2cf863c5712a67cb4b (patch)
tree217344e3fac3b1f6cb8e116ef257cfa39835529f /src
parent4c6600603704454df7c3a21039111f195d2d0ff1 (diff)
downloadserd-0c71ed6ce02ee7ff65a5da2cf863c5712a67cb4b.tar.gz
serd-0c71ed6ce02ee7ff65a5da2cf863c5712a67cb4b.tar.bz2
serd-0c71ed6ce02ee7ff65a5da2cf863c5712a67cb4b.zip
Add support for NQuads
Diffstat (limited to 'src')
-rw-r--r--src/reader.c164
-rw-r--r--src/serdi.c10
-rw-r--r--src/writer.c11
3 files changed, 160 insertions, 25 deletions
diff --git a/src/reader.c b/src/reader.c
index 0cfa1575..ddb8551b 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -1,5 +1,5 @@
/*
- Copyright 2011-2016 David Robillard <http://drobilla.net>
+ Copyright 2011-2017 David Robillard <http://drobilla.net>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
@@ -55,6 +55,9 @@ typedef struct {
Ref graph;
Ref subject;
Ref predicate;
+ Ref object;
+ Ref datatype;
+ Ref language;
SerdStatementFlags* flags;
} ReadContext;
@@ -95,6 +98,18 @@ struct SerdReaderImpl {
#endif
};
+static inline bool
+supports_fancy_literals(const SerdReader* reader)
+{
+ return reader->syntax == SERD_TURTLE;
+}
+
+static inline bool
+supports_relative_iris(const SerdReader* reader)
+{
+ return reader->syntax == SERD_TURTLE;
+}
+
static int
r_err(SerdReader* reader, SerdStatus st, const char* fmt, ...)
{
@@ -618,6 +633,11 @@ read_String(SerdReader* reader, SerdNodeFlags* flags)
return push_node(reader, SERD_LITERAL, "", 0);
}
+ if (!supports_fancy_literals(reader)) {
+ return r_err(reader, SERD_ERR_BAD_SYNTAX,
+ "syntax does not support long literals\n");
+ }
+
eat_byte_safe(reader, q3);
return read_STRING_LITERAL_LONG(reader, flags, q1);
}
@@ -781,14 +801,35 @@ read_LANGTAG(SerdReader* reader)
return ref;
}
+typedef enum { PREFIX, GOOD, BAD} SchemeState;
+
+static inline bool
+check_scheme(SerdReader* reader, uint8_t c, SchemeState* state)
+{
+ if (!supports_relative_iris(reader) && *state == PREFIX) {
+ if (c == ':') {
+ *state = GOOD;
+ } else if (!isalpha(c)) {
+ *state = BAD;
+ return r_err(reader, SERD_ERR_BAD_SYNTAX,
+ "syntax does not support relative IRIs\n");
+ }
+ }
+ return true;
+}
+
static Ref
read_IRIREF(SerdReader* reader)
{
TRY_RET(eat_byte_check(reader, '<'));
- Ref ref = push_node(reader, SERD_URI, "", 0);
- uint32_t code;
+ Ref ref = push_node(reader, SERD_URI, "", 0);
+ SchemeState scheme = PREFIX;
+ uint32_t code;
while (true) {
const uint8_t c = peek_byte(reader);
+ if (!check_scheme(reader, c, &scheme)) {
+ return pop_node(reader, ref);
+ }
switch (c) {
case '"': case '<': case '^': case '`': case '{': case '|': case '}':
r_err(reader, SERD_ERR_BAD_SYNTAX,
@@ -1141,10 +1182,11 @@ read_anon(SerdReader* reader, ReadContext ctx, bool subject, Ref* dest)
return (eat_byte_check(reader, ']') == ']');
}
-// Recurses, calling statement_sink for every statement encountered.
-// Leaves stack in original calling state (i.e. pops everything it pushes).
+/* If emit is true: recurses, calling statement_sink for every statement
+ encountered, and leaves stack in original calling state (i.e. pops
+ everything it pushes). */
static bool
-read_object(SerdReader* reader, ReadContext ctx, bool* ate_dot)
+read_object(SerdReader* reader, ReadContext* ctx, bool emit, bool* ate_dot)
{
static const char* const XSD_BOOLEAN = NS_XSD "boolean";
static const size_t XSD_BOOLEAN_LEN = 40;
@@ -1154,24 +1196,31 @@ read_object(SerdReader* reader, ReadContext ctx, bool* ate_dot)
#endif
bool ret = false;
- bool emit = (ctx.subject != 0);
+ bool simple = (ctx->subject != 0);
SerdNode* node = NULL;
Ref o = 0;
Ref datatype = 0;
Ref lang = 0;
uint32_t flags = 0;
const uint8_t c = peek_byte(reader);
+ if (!supports_fancy_literals(reader)) {
+ switch (c) {
+ case '"': case ':': case '<': case '_': break;
+ default: return r_err(reader, SERD_ERR_BAD_SYNTAX,
+ "expected: ':', '<', or '_'\n");
+ }
+ }
switch (c) {
case '\0':
case ')':
return false;
case '[':
- emit = false;
- TRY_THROW(ret = read_anon(reader, ctx, false, &o));
+ simple = false;
+ TRY_THROW(ret = read_anon(reader, *ctx, false, &o));
break;
case '(':
- emit = false;
- TRY_THROW(ret = read_collection(reader, ctx, &o));
+ simple = false;
+ TRY_THROW(ret = read_collection(reader, *ctx, &o));
break;
case '_':
TRY_THROW(ret = (o = read_BLANK_NODE_LABEL(reader, ate_dot)));
@@ -1206,9 +1255,17 @@ read_object(SerdReader* reader, ReadContext ctx, bool* ate_dot)
}
}
- if (ret && emit) {
+ if (simple) {
deref(reader, o)->flags = flags;
- ret = emit_statement(reader, ctx, o, datatype, lang);
+ }
+
+ if (ret && emit && simple) {
+ ret = emit_statement(reader, *ctx, o, datatype, lang);
+ } else if (ret && !emit) {
+ ctx->object = o;
+ ctx->datatype = datatype;
+ ctx->language = lang;
+ return true;
}
except:
@@ -1224,9 +1281,9 @@ except:
static bool
read_objectList(SerdReader* reader, ReadContext ctx, bool* ate_dot)
{
- TRY_RET(read_object(reader, ctx, ate_dot));
+ TRY_RET(read_object(reader, &ctx, true, ate_dot));
while (!*ate_dot && eat_delim(reader, ',')) {
- TRY_RET(read_object(reader, ctx, ate_dot));
+ TRY_RET(read_object(reader, &ctx, true, ate_dot));
}
return true;
}
@@ -1311,7 +1368,7 @@ read_collection(SerdReader* reader, ReadContext ctx, Ref* dest)
// _:node rdf:first object
ctx.predicate = reader->rdf_first;
bool ate_dot = false;
- if (!read_object(reader, ctx, &ate_dot) || ate_dot) {
+ if (!read_object(reader, &ctx, true, &ate_dot) || ate_dot) {
return end_collection(reader, ctx, n1, n2, false);
}
@@ -1454,7 +1511,7 @@ static bool
read_statement(SerdReader* reader)
{
SerdStatementFlags flags = 0;
- ReadContext ctx = { 0, 0, 0, &flags };
+ ReadContext ctx = { 0, 0, 0, 0, 0, 0, &flags };
read_ws_star(reader);
bool ate_dot = false;
switch (peek_byte(reader)) {
@@ -1489,6 +1546,75 @@ read_turtleDoc(SerdReader* reader)
return !reader->error;
}
+static bool
+read_nquadsDoc(SerdReader* reader)
+{
+ while (!reader->eof) {
+ SerdStatementFlags flags = 0;
+ ReadContext ctx = { 0, 0, 0, 0, 0, 0, &flags };
+ bool ate_dot = false;
+ bool nested = false;
+ read_ws_star(reader);
+ if (peek_byte(reader) == '\0') {
+ reader->eof = true;
+ return !reader->error;
+ }
+
+ // subject
+ if (!(ctx.subject = read_subject(reader, ctx, &nested))) {
+ return false;
+ }
+
+ // predicate
+ TRY_RET(read_ws_star(reader));
+ if (!(ctx.predicate = read_IRIREF(reader))) {
+ return false;
+ }
+
+ // object
+ TRY_RET(read_ws_star(reader));
+ if (!read_object(reader, &ctx, false, &ate_dot)) {
+ return false;
+ }
+
+ if (!ate_dot) {
+ // graphLabel?
+ TRY_RET(read_ws_star(reader));
+ switch (peek_byte(reader)) {
+ case '.':
+ break;
+ case '_':
+ ctx.graph = read_BLANK_NODE_LABEL(reader, &ate_dot);
+ break;
+ default:
+ if (!(ctx.graph = read_IRIREF(reader))) {
+ return false;
+ }
+ }
+
+ // Terminating '.'
+ TRY_RET(read_ws_star(reader));
+ eat_byte_check(reader, '.');
+ }
+
+ TRY_RET(emit_statement(reader, ctx, ctx.object, ctx.datatype, ctx.language));
+ pop_node(reader, ctx.graph);
+ pop_node(reader, ctx.language);
+ pop_node(reader, ctx.datatype);
+ pop_node(reader, ctx.object);
+ }
+ return !reader->error;
+}
+
+static bool
+read_doc(SerdReader* reader)
+{
+ switch (reader->syntax) {
+ case SERD_NQUADS: return read_nquadsDoc(reader);
+ default: return read_turtleDoc(reader);
+ }
+}
+
SERD_API
SerdReader*
serd_reader_new(SerdSyntax syntax,
@@ -1686,7 +1812,7 @@ serd_reader_read_file_handle(SerdReader* me, FILE* file, const uint8_t* name)
{
SerdStatus st = serd_reader_start_stream(me, file, name, true);
if (!st) {
- st = read_turtleDoc(me) ? SERD_SUCCESS : SERD_ERR_UNKNOWN;
+ st = read_doc(me) ? SERD_SUCCESS : SERD_ERR_UNKNOWN;
serd_reader_end_stream(me);
}
return st;
@@ -1706,7 +1832,7 @@ serd_reader_read_string(SerdReader* me, const uint8_t* utf8)
me->eof = false;
skip_bom(me);
- const bool ret = read_turtleDoc(me);
+ const bool ret = read_doc(me);
me->read_buf = NULL;
return ret ? SERD_SUCCESS : SERD_ERR_UNKNOWN;
diff --git a/src/serdi.c b/src/serdi.c
index 953a3c0c..be52587c 100644
--- a/src/serdi.c
+++ b/src/serdi.c
@@ -1,5 +1,5 @@
/*
- Copyright 2011-2016 David Robillard <http://drobilla.net>
+ Copyright 2011-2017 David Robillard <http://drobilla.net>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
@@ -33,7 +33,7 @@ static int
print_version(void)
{
printf("serdi " SERD_VERSION " <http://drobilla.net/software/serd>\n");
- printf("Copyright 2011-2016 David Robillard <http://drobilla.net>.\n"
+ printf("Copyright 2011-2017 David Robillard <http://drobilla.net>.\n"
"License: <http://www.opensource.org/licenses/isc>\n"
"This is free software; you are free to change and redistribute it."
"\nThere is NO WARRANTY, to the extent permitted by law.\n");
@@ -53,9 +53,9 @@ print_usage(const char* name, bool error)
fprintf(os, " -e Eat input one character at a time.\n");
fprintf(os, " -f Keep full URIs in input (don't qualify).\n");
fprintf(os, " -h Display this help and exit.\n");
- fprintf(os, " -i SYNTAX Input syntax (`turtle' or `ntriples').\n");
+ fprintf(os, " -i SYNTAX Input syntax (turtle, ntriples, or nquads).\n");
fprintf(os, " -l Lax (non-strict) parsing.\n");
- fprintf(os, " -o SYNTAX Output syntax (`turtle' or `ntriples').\n");
+ fprintf(os, " -o SYNTAX Output syntax (turtle, ntriples, or nquads).\n");
fprintf(os, " -p PREFIX Add PREFIX to blank node IDs.\n");
fprintf(os, " -q Suppress all output except data.\n");
fprintf(os, " -r ROOT_URI Keep relative URIs within ROOT_URI.\n");
@@ -71,6 +71,8 @@ set_syntax(SerdSyntax* syntax, const char* name)
*syntax = SERD_TURTLE;
} else if (!strcmp(name, "ntriples")) {
*syntax = SERD_NTRIPLES;
+ } else if (!strcmp(name, "nquads")) {
+ *syntax = SERD_NQUADS;
} else {
SERDI_ERRORF("unknown syntax `%s'\n", name);
return false;
diff --git a/src/writer.c b/src/writer.c
index 0d1ebded..59c05420 100644
--- a/src/writer.c
+++ b/src/writer.c
@@ -1,5 +1,5 @@
/*
- Copyright 2011-2016 David Robillard <http://drobilla.net>
+ Copyright 2011-2017 David Robillard <http://drobilla.net>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
@@ -431,7 +431,8 @@ typedef enum {
FIELD_NONE,
FIELD_SUBJECT,
FIELD_PREDICATE,
- FIELD_OBJECT
+ FIELD_OBJECT,
+ FIELD_GRAPH
} Field;
static bool
@@ -491,6 +492,7 @@ write_node(SerdWriter* writer,
case SERD_CURIE:
switch (writer->syntax) {
case SERD_NTRIPLES:
+ case SERD_NQUADS:
if (serd_env_expand(writer->env, node, &uri_prefix, &suffix)) {
w_err(writer, SERD_ERR_BAD_CURIE,
"undefined namespace prefix `%s'\n", node->buf);
@@ -661,11 +663,16 @@ serd_writer_write_statement(SerdWriter* writer,
switch (writer->syntax) {
case SERD_NTRIPLES:
+ case SERD_NQUADS:
TRY(write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags));
sink(" ", 1, writer);
TRY(write_node(writer, predicate, NULL, NULL, FIELD_PREDICATE, flags));
sink(" ", 1, writer);
TRY(write_node(writer, object, datatype, lang, FIELD_OBJECT, flags));
+ if (writer->syntax == SERD_NQUADS && graph) {
+ sink(" ", 1, writer);
+ TRY(write_node(writer, graph, datatype, lang, FIELD_GRAPH, flags));
+ }
sink(" .\n", 3, writer);
return SERD_SUCCESS;
default: