aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2021-03-01 19:10:36 -0500
committerDavid Robillard <d@drobilla.net>2021-03-08 23:23:06 -0500
commitc7a5af0235706bc0624a236fae03fc728144234f (patch)
treeea682746f8d955f9e8ae3f549848be54f91cd6fd /src
parent8a13a270d9150e0cd14a049c76c601d09ee539bf (diff)
downloadserd-c7a5af0235706bc0624a236fae03fc728144234f.tar.gz
serd-c7a5af0235706bc0624a236fae03fc728144234f.tar.bz2
serd-c7a5af0235706bc0624a236fae03fc728144234f.zip
Expand URIs in reader
Diffstat (limited to 'src')
-rw-r--r--src/n3.c139
-rw-r--r--src/node_syntax.c16
-rw-r--r--src/reader.c4
-rw-r--r--src/reader.h6
-rw-r--r--src/serdi.c6
-rw-r--r--src/string.c2
-rw-r--r--src/writer.c62
7 files changed, 172 insertions, 63 deletions
diff --git a/src/n3.c b/src/n3.c
index 6036bc7f..22c24fd7 100644
--- a/src/n3.c
+++ b/src/n3.c
@@ -15,6 +15,7 @@
*/
#include "byte_source.h"
+#include "env.h"
#include "namespaces.h"
#include "node.h"
#include "reader.h"
@@ -707,6 +708,63 @@ read_IRIREF_scheme(SerdReader* reader, SerdNode* dest)
return SERD_FAILURE;
}
+typedef struct {
+ SerdReader* reader;
+ SerdNode* node;
+ SerdStatus status;
+} WriteNodeContext;
+
+static size_t
+write_to_stack(const void* const SERD_NONNULL buf,
+ const size_t size,
+ const size_t nmemb,
+ void* const SERD_NONNULL stream)
+{
+ WriteNodeContext* const ctx = (WriteNodeContext*)stream;
+ const uint8_t* const utf8 = (const uint8_t*)buf;
+
+ ctx->status = push_bytes(ctx->reader, ctx->node, utf8, nmemb * size);
+
+ return nmemb;
+}
+
+static SerdStatus
+resolve_IRIREF(SerdReader* const reader,
+ SerdNode* const dest,
+ const size_t string_start_offset)
+{
+ // If the URI is already absolute, we don't need to do anything
+ SerdURIView uri = serd_parse_uri(serd_node_string(dest));
+ if (uri.scheme.len) {
+ return SERD_SUCCESS;
+ }
+
+ // Resolve relative URI reference to a full URI
+ uri = serd_resolve_uri(uri, serd_env_base_uri_view(reader->env));
+ if (!uri.scheme.len) {
+ return SERD_ERR_BAD_URI;
+ }
+
+ // Push a new temporary node for constructing the resolved URI
+ SerdNode* const temp = push_node(reader, SERD_URI, "", 0);
+ if (!temp) {
+ return SERD_ERR_OVERFLOW;
+ }
+
+ // Write resolved URI to the temporary node
+ WriteNodeContext ctx = {reader, temp, SERD_SUCCESS};
+ temp->n_bytes = serd_write_uri(uri, write_to_stack, &ctx);
+ if (ctx.status) {
+ return ctx.status;
+ }
+
+ // Replace the destination with the new expanded node
+ memmove(dest, temp, serd_node_total_size(temp));
+ serd_stack_pop_to(&reader->stack, string_start_offset + dest->n_bytes);
+
+ return SERD_SUCCESS;
+}
+
static SerdStatus
read_IRIREF(SerdReader* reader, SerdNode** dest)
{
@@ -718,6 +776,8 @@ read_IRIREF(SerdReader* reader, SerdNode** dest)
return SERD_ERR_OVERFLOW;
}
+ const size_t string_start_offset = reader->stack.size;
+
if (!fancy_syntax(reader) && read_IRIREF_scheme(reader, *dest)) {
return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected IRI scheme\n");
}
@@ -732,7 +792,9 @@ read_IRIREF(SerdReader* reader, SerdNode** dest)
return r_err(
reader, SERD_ERR_BAD_SYNTAX, "invalid IRI character `%c'\n", c);
case '>':
- return SERD_SUCCESS;
+ return (st || (reader->flags & SERD_READ_RELATIVE))
+ ? st
+ : resolve_IRIREF(reader, *dest, string_start_offset);
case '\\':
if (read_UCHAR(reader, *dest, &code)) {
return r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid IRI escape\n");
@@ -780,10 +842,11 @@ read_IRIREF(SerdReader* reader, SerdNode** dest)
}
static SerdStatus
-read_PrefixedName(SerdReader* reader,
- SerdNode* dest,
- bool read_prefix,
- bool* ate_dot)
+read_PrefixedName(SerdReader* reader,
+ SerdNode* dest,
+ bool read_prefix,
+ bool* ate_dot,
+ const size_t string_start_offset)
{
SerdStatus st = SERD_SUCCESS;
if (read_prefix && ((st = read_PN_PREFIX(reader, dest)) > SERD_FAILURE)) {
@@ -795,10 +858,32 @@ read_PrefixedName(SerdReader* reader,
}
if ((st = push_byte(reader, dest, eat_byte_safe(reader, ':'))) ||
- (st = read_PN_LOCAL(reader, dest, ate_dot)) > SERD_FAILURE) {
+ (st = read_PN_LOCAL(reader, dest, ate_dot)) > SERD_FAILURE ||
+ (reader->flags & SERD_READ_PREFIXED)) {
return st;
}
+ // Expand to absolute URI
+ SerdStringView prefix;
+ SerdStringView suffix;
+ if ((st = serd_env_expand_in_place(reader->env, dest, &prefix, &suffix))) {
+ return r_err(reader, st, "failed to expand URI\n");
+ }
+
+ // Push a new temporary node for constructing the full URI
+ SerdNode* const temp = push_node(reader, SERD_URI, "", 0);
+ if ((st = push_bytes(reader, temp, (const uint8_t*)prefix.buf, prefix.len)) ||
+ (st = push_bytes(reader, temp, (const uint8_t*)suffix.buf, suffix.len))) {
+ return st;
+ }
+
+ // Replace the destination with the new expanded node
+ const size_t total_size = serd_node_total_size(temp);
+ memmove(dest, temp, total_size);
+
+ serd_stack_pop_to(&reader->stack,
+ string_start_offset + serd_node_length(dest));
+
return SERD_SUCCESS;
}
@@ -889,14 +974,15 @@ read_number(SerdReader* reader, SerdNode** dest, bool* ate_dot)
static SerdStatus
read_iri(SerdReader* reader, SerdNode** dest, bool* ate_dot)
{
- switch (peek_byte(reader)) {
- case '<':
+ if (peek_byte(reader) == '<') {
return read_IRIREF(reader, dest);
- default:
- *dest = push_node(reader, SERD_CURIE, "", 0);
- return *dest ? read_PrefixedName(reader, *dest, true, ate_dot)
- : SERD_ERR_OVERFLOW;
}
+
+ if (!(*dest = push_node(reader, SERD_CURIE, "", 0))) {
+ return SERD_ERR_OVERFLOW;
+ }
+
+ return read_PrefixedName(reader, *dest, true, ate_dot, reader->stack.size);
}
static SerdStatus
@@ -986,10 +1072,11 @@ read_verb(SerdReader* reader, SerdNode** dest)
return SERD_ERR_OVERFLOW;
}
- SerdStatus st = read_PN_PREFIX(reader, *dest);
- bool ate_dot = false;
- SerdNode* node = *dest;
- const int next = peek_byte(reader);
+ const size_t string_start_offset = reader->stack.size;
+ SerdStatus st = read_PN_PREFIX(reader, *dest);
+ bool ate_dot = false;
+ SerdNode* node = *dest;
+ const int next = peek_byte(reader);
if (!st && node->n_bytes == 1 && serd_node_string(node)[0] == 'a' &&
next != ':' && !is_PN_CHARS_BASE((uint32_t)next)) {
serd_stack_pop_to(&reader->stack, orig_stack_size);
@@ -998,7 +1085,8 @@ read_verb(SerdReader* reader, SerdNode** dest)
: SERD_ERR_OVERFLOW);
}
- if (st > SERD_FAILURE || read_PrefixedName(reader, *dest, false, &ate_dot) ||
+ if (st > SERD_FAILURE ||
+ read_PrefixedName(reader, *dest, false, &ate_dot, string_start_offset) ||
ate_dot) {
*dest = NULL;
return r_err(reader, SERD_ERR_BAD_SYNTAX, "bad verb\n");
@@ -1207,7 +1295,7 @@ read_object(SerdReader* reader, ReadContext* ctx, bool emit, bool* ate_dot)
++orig_cursor.col;
ret = read_literal(reader, &o, ate_dot);
break;
- default:
+ default: {
/* Either a boolean literal, or a qname. Read the prefix first, and if
it is in fact a "true" or "false" literal, produce that instead.
*/
@@ -1215,8 +1303,10 @@ read_object(SerdReader* reader, ReadContext* ctx, bool emit, bool* ate_dot)
return SERD_ERR_OVERFLOW;
}
+ const size_t string_start_offset = reader->stack.size;
while (!read_PN_CHARS_BASE(reader, o)) {
}
+
if ((o->n_bytes == 4 && !memcmp(serd_node_string(o), "true", 4)) ||
(o->n_bytes == 5 && !memcmp(serd_node_string(o), "false", 5))) {
o->flags |= SERD_HAS_DATATYPE;
@@ -1229,12 +1319,14 @@ read_object(SerdReader* reader, ReadContext* ctx, bool emit, bool* ate_dot)
} else if (read_PN_PREFIX_tail(reader, o) > SERD_FAILURE) {
ret = SERD_ERR_BAD_SYNTAX;
} else {
- if ((ret = read_PrefixedName(reader, o, false, ate_dot))) {
+ if ((ret = read_PrefixedName(
+ reader, o, false, ate_dot, string_start_offset))) {
ret = ret > SERD_FAILURE ? ret : SERD_ERR_BAD_SYNTAX;
return r_err(reader, ret, "expected prefixed name\n");
}
}
}
+ }
if (!ret && emit && simple && o) {
serd_node_zero_pad(o);
@@ -1475,6 +1567,7 @@ read_base(SerdReader* reader, bool sparql, bool token)
SerdNode* uri = NULL;
TRY(st, read_IRIREF(reader, &uri));
serd_node_zero_pad(uri);
+ TRY(st, serd_env_set_base_uri(reader->env, serd_node_string_view(uri)));
TRY(st, serd_sink_write_base(reader->sink, uri));
read_ws_star(reader);
@@ -1517,12 +1610,18 @@ read_prefixID(SerdReader* reader, bool sparql, bool token)
serd_node_zero_pad(name);
serd_node_zero_pad(uri);
- st = serd_sink_write_prefix(reader->sink, name, uri);
+
+ TRY(st,
+ serd_env_set_prefix(
+ reader->env, serd_node_string_view(name), serd_node_string_view(uri)));
+
+ TRY(st, serd_sink_write_prefix(reader->sink, name, uri));
if (!sparql) {
read_ws_star(reader);
st = eat_byte_check(reader, '.') ? SERD_SUCCESS : SERD_ERR_BAD_SYNTAX;
}
+
return st;
}
diff --git a/src/node_syntax.c b/src/node_syntax.c
index 88d360dd..fc442f33 100644
--- a/src/node_syntax.c
+++ b/src/node_syntax.c
@@ -55,19 +55,26 @@ serd_node_from_syntax(const char* const str, const SerdSyntax syntax)
SerdNode* object = NULL;
SerdWorld* const world = serd_world_new();
+ SerdEnv* const env = serd_env_new(SERD_EMPTY_STRING());
SerdSink* const sink = serd_sink_new(&object, on_node_string_event, NULL);
SerdByteSource* const source = serd_byte_source_new_string(doc, NULL);
SerdReader* const reader = serd_reader_new(
- world, syntax, SERD_READ_EXACT_BLANKS, sink, 1024 + doc_len);
+ world,
+ syntax,
+ SERD_READ_EXACT_BLANKS | SERD_READ_PREFIXED | SERD_READ_RELATIVE,
+ env,
+ sink,
+ 1024 + doc_len);
serd_world_set_error_func(world, quiet_error_func, NULL);
serd_reader_start(reader, source);
serd_reader_read_document(reader);
serd_reader_finish(reader);
- serd_byte_source_free(source);
serd_reader_free(reader);
+ serd_byte_source_free(source);
serd_sink_free(sink);
+ serd_env_free(env);
serd_world_free(world);
free(doc);
@@ -92,5 +99,10 @@ serd_node_to_syntax(const SerdNode* const node, const SerdSyntax syntax)
result = serd_buffer_sink_finish(&buffer);
}
+ serd_writer_free(writer);
+ serd_byte_sink_free(out);
+ serd_env_free(env);
+ serd_world_free(world);
+
return result;
}
diff --git a/src/reader.c b/src/reader.c
index 4c4bffa5..ba3559bc 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -158,7 +158,8 @@ serd_reader_read_document(SerdReader* reader)
SerdReader*
serd_reader_new(SerdWorld* const world,
const SerdSyntax syntax,
- SerdReaderFlags flags,
+ const SerdReaderFlags flags,
+ SerdEnv* const env,
const SerdSink* const sink,
const size_t stack_size)
{
@@ -170,6 +171,7 @@ serd_reader_new(SerdWorld* const world,
me->world = world;
me->sink = sink;
+ me->env = env;
me->stack = serd_stack_new(stack_size, sizeof(SerdNode));
me->syntax = syntax;
me->flags = flags;
diff --git a/src/reader.h b/src/reader.h
index 9bdbf72f..8fba7504 100644
--- a/src/reader.h
+++ b/src/reader.h
@@ -58,6 +58,7 @@ struct SerdReaderImpl {
SerdNode* rdf_rest;
SerdNode* rdf_nil;
SerdByteSource* source;
+ SerdEnv* env;
SerdStack stack;
SerdSyntax syntax;
SerdReaderFlags flags;
@@ -172,10 +173,7 @@ push_byte(SerdReader* reader, SerdNode* node, const int c)
}
static inline SerdStatus
-push_bytes(SerdReader* reader,
- SerdNode* ref,
- const uint8_t* bytes,
- unsigned len)
+push_bytes(SerdReader* reader, SerdNode* ref, const uint8_t* bytes, size_t len)
{
const bool has_space = reader->stack.buf_size >= reader->stack.size + len;
if (has_space) {
diff --git a/src/serdi.c b/src/serdi.c
index 2801a2da..78de1ad3 100644
--- a/src/serdi.c
+++ b/src/serdi.c
@@ -95,6 +95,7 @@ static SerdStatus
read_file(SerdWorld* const world,
SerdSyntax syntax,
const SerdReaderFlags flags,
+ SerdEnv* const env,
const SerdSink* const sink,
const size_t stack_size,
const char* filename,
@@ -122,7 +123,8 @@ read_file(SerdWorld* const world,
return SERD_ERR_UNKNOWN;
}
- SerdReader* reader = serd_reader_new(world, syntax, flags, sink, stack_size);
+ SerdReader* reader =
+ serd_reader_new(world, syntax, flags, env, sink, stack_size);
serd_reader_add_blank_prefix(reader, add_prefix);
@@ -326,6 +328,7 @@ main(int argc, char** argv)
serd_reader_new(world,
input_syntax ? input_syntax : SERD_TRIG,
reader_flags,
+ env,
serd_writer_sink(writer),
stack_size);
@@ -362,6 +365,7 @@ main(int argc, char** argv)
if ((st = read_file(world,
input_syntax,
reader_flags,
+ env,
serd_writer_sink(writer),
stack_size,
inputs[i],
diff --git a/src/string.c b/src/string.c
index b5904c84..4c6a1135 100644
--- a/src/string.c
+++ b/src/string.c
@@ -64,6 +64,8 @@ serd_strerror(SerdStatus status)
return "Error writing to file";
case SERD_ERR_BAD_CALL:
return "Invalid call";
+ case SERD_ERR_BAD_URI:
+ return "Invalid or unresolved URI";
default:
break;
}
diff --git a/src/writer.c b/src/writer.c
index c520638e..f55b47d4 100644
--- a/src/writer.c
+++ b/src/writer.c
@@ -129,7 +129,7 @@ struct SerdWriterImpl {
SerdSink iface;
SerdSyntax syntax;
SerdWriterFlags flags;
- SerdEnv* env;
+ const SerdEnv* env;
SerdNode* root_node;
SerdURIView root_uri;
WriteContext* anon_stack;
@@ -1184,7 +1184,7 @@ SerdWriter*
serd_writer_new(SerdWorld* world,
SerdSyntax syntax,
SerdWriterFlags flags,
- SerdEnv* env,
+ const SerdEnv* env,
SerdByteSink* byte_sink)
{
const WriteContext context = WRITE_CONTEXT_NULL;
@@ -1230,22 +1230,18 @@ serd_writer_set_base_uri(SerdWriter* writer, const SerdNode* uri)
return SERD_ERR_BAD_ARG;
}
- SerdStatus st =
- serd_env_set_base_uri(writer->env, serd_node_string_view(uri));
-
- if (!st) {
- if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) {
- if (ctx(writer, SERD_GRAPH) || ctx(writer, SERD_SUBJECT)) {
- TRY(st, esink(" .\n\n", 4, writer));
- reset_context(writer, true);
- }
- TRY(st, esink("@base <", 7, writer));
- TRY(st, esink(serd_node_string(uri), uri->n_bytes, writer));
- TRY(st, esink("> .\n", 4, writer));
+ SerdStatus st = SERD_SUCCESS;
+ if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) {
+ if (ctx(writer, SERD_GRAPH) || ctx(writer, SERD_SUBJECT)) {
+ TRY(st, esink(" .\n\n", 4, writer));
+ reset_context(writer, true);
}
- writer->indent = 0;
- reset_context(writer, true);
+ TRY(st, esink("@base <", 7, writer));
+ TRY(st, esink(serd_node_string(uri), uri->n_bytes, writer));
+ TRY(st, esink("> .\n", 4, writer));
}
+ writer->indent = 0;
+ reset_context(writer, true);
return st;
}
@@ -1273,27 +1269,23 @@ serd_writer_set_prefix(SerdWriter* writer,
return SERD_ERR_BAD_ARG;
}
- SerdStatus st = serd_env_set_prefix(
- writer->env, serd_node_string_view(name), serd_node_string_view(uri));
-
- if (!st) {
- if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) {
- if (ctx(writer, SERD_GRAPH) || ctx(writer, SERD_SUBJECT)) {
- TRY(st, esink(" .\n\n", 4, writer));
- reset_context(writer, true);
- }
-
- TRY(st, esink("@prefix ", 8, writer));
- TRY(st, esink(serd_node_string(name), name->n_bytes, writer));
- TRY(st, esink(": <", 3, writer));
- TRY(st, write_uri_from_node(writer, uri));
- TRY(st, esink("> .\n", 4, writer));
+ SerdStatus st = SERD_SUCCESS;
+ if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) {
+ if (ctx(writer, SERD_GRAPH) || ctx(writer, SERD_SUBJECT)) {
+ TRY(st, esink(" .\n\n", 4, writer));
+ reset_context(writer, true);
}
- writer->indent = 0;
- reset_context(writer, true);
+ TRY(st, esink("@prefix ", 8, writer));
+ TRY(st, esink(serd_node_string(name), name->n_bytes, writer));
+ TRY(st, esink(": <", 3, writer));
+ TRY(st, write_uri_from_node(writer, uri));
+ TRY(st, esink("> .\n", 4, writer));
}
+ writer->indent = 0;
+ reset_context(writer, true);
+
return st;
}
@@ -1317,8 +1309,8 @@ serd_writer_sink(SerdWriter* writer)
return &writer->iface;
}
-SerdEnv*
-serd_writer_env(SerdWriter* writer)
+const SerdEnv*
+serd_writer_env(const SerdWriter* writer)
{
return writer->env;
}