diff options
author | David Robillard <d@drobilla.net> | 2021-03-01 19:10:36 -0500 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2021-03-08 23:23:06 -0500 |
commit | c7a5af0235706bc0624a236fae03fc728144234f (patch) | |
tree | ea682746f8d955f9e8ae3f549848be54f91cd6fd /src | |
parent | 8a13a270d9150e0cd14a049c76c601d09ee539bf (diff) | |
download | serd-c7a5af0235706bc0624a236fae03fc728144234f.tar.gz serd-c7a5af0235706bc0624a236fae03fc728144234f.tar.bz2 serd-c7a5af0235706bc0624a236fae03fc728144234f.zip |
Expand URIs in reader
Diffstat (limited to 'src')
-rw-r--r-- | src/n3.c | 139 | ||||
-rw-r--r-- | src/node_syntax.c | 16 | ||||
-rw-r--r-- | src/reader.c | 4 | ||||
-rw-r--r-- | src/reader.h | 6 | ||||
-rw-r--r-- | src/serdi.c | 6 | ||||
-rw-r--r-- | src/string.c | 2 | ||||
-rw-r--r-- | src/writer.c | 62 |
7 files changed, 172 insertions, 63 deletions
@@ -15,6 +15,7 @@ */ #include "byte_source.h" +#include "env.h" #include "namespaces.h" #include "node.h" #include "reader.h" @@ -707,6 +708,63 @@ read_IRIREF_scheme(SerdReader* reader, SerdNode* dest) return SERD_FAILURE; } +typedef struct { + SerdReader* reader; + SerdNode* node; + SerdStatus status; +} WriteNodeContext; + +static size_t +write_to_stack(const void* const SERD_NONNULL buf, + const size_t size, + const size_t nmemb, + void* const SERD_NONNULL stream) +{ + WriteNodeContext* const ctx = (WriteNodeContext*)stream; + const uint8_t* const utf8 = (const uint8_t*)buf; + + ctx->status = push_bytes(ctx->reader, ctx->node, utf8, nmemb * size); + + return nmemb; +} + +static SerdStatus +resolve_IRIREF(SerdReader* const reader, + SerdNode* const dest, + const size_t string_start_offset) +{ + // If the URI is already absolute, we don't need to do anything + SerdURIView uri = serd_parse_uri(serd_node_string(dest)); + if (uri.scheme.len) { + return SERD_SUCCESS; + } + + // Resolve relative URI reference to a full URI + uri = serd_resolve_uri(uri, serd_env_base_uri_view(reader->env)); + if (!uri.scheme.len) { + return SERD_ERR_BAD_URI; + } + + // Push a new temporary node for constructing the resolved URI + SerdNode* const temp = push_node(reader, SERD_URI, "", 0); + if (!temp) { + return SERD_ERR_OVERFLOW; + } + + // Write resolved URI to the temporary node + WriteNodeContext ctx = {reader, temp, SERD_SUCCESS}; + temp->n_bytes = serd_write_uri(uri, write_to_stack, &ctx); + if (ctx.status) { + return ctx.status; + } + + // Replace the destination with the new expanded node + memmove(dest, temp, serd_node_total_size(temp)); + serd_stack_pop_to(&reader->stack, string_start_offset + dest->n_bytes); + + return SERD_SUCCESS; +} + static SerdStatus read_IRIREF(SerdReader* reader, SerdNode** dest) { @@ -718,6 +776,8 @@ read_IRIREF(SerdReader* reader, SerdNode** dest) return SERD_ERR_OVERFLOW; } + const size_t string_start_offset = reader->stack.size; + if (!fancy_syntax(reader) && read_IRIREF_scheme(reader, *dest)) { return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected IRI scheme\n"); } @@ -732,7 +792,9 @@ read_IRIREF(SerdReader* reader, SerdNode** dest) return r_err( reader, SERD_ERR_BAD_SYNTAX, "invalid IRI character `%c'\n", c); case '>': - return SERD_SUCCESS; + return (st || (reader->flags & SERD_READ_RELATIVE)) + ? st + : resolve_IRIREF(reader, *dest, string_start_offset); case '\\': if (read_UCHAR(reader, *dest, &code)) { return r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid IRI escape\n"); @@ -780,10 +842,11 @@ read_IRIREF(SerdReader* reader, SerdNode** dest) } static SerdStatus -read_PrefixedName(SerdReader* reader, - SerdNode* dest, - bool read_prefix, - bool* ate_dot) +read_PrefixedName(SerdReader* reader, + SerdNode* dest, + bool read_prefix, + bool* ate_dot, + const size_t string_start_offset) { SerdStatus st = SERD_SUCCESS; if (read_prefix && ((st = read_PN_PREFIX(reader, dest)) > SERD_FAILURE)) { @@ -795,10 +858,32 @@ read_PrefixedName(SerdReader* reader, } if ((st = push_byte(reader, dest, eat_byte_safe(reader, ':'))) || - (st = read_PN_LOCAL(reader, dest, ate_dot)) > SERD_FAILURE) { + (st = read_PN_LOCAL(reader, dest, ate_dot)) > SERD_FAILURE || + (reader->flags & SERD_READ_PREFIXED)) { return st; } + // Expand to absolute URI + SerdStringView prefix; + SerdStringView suffix; + if ((st = serd_env_expand_in_place(reader->env, dest, &prefix, &suffix))) { + return r_err(reader, st, "failed to expand URI\n"); + } + + // Push a new temporary node for constructing the full URI + SerdNode* const temp = push_node(reader, SERD_URI, "", 0); + if ((st = push_bytes(reader, temp, (const uint8_t*)prefix.buf, prefix.len)) || + (st = push_bytes(reader, temp, (const uint8_t*)suffix.buf, suffix.len))) { + return st; + } + + // Replace the destination with the new expanded node + const size_t total_size = serd_node_total_size(temp); + memmove(dest, temp, total_size); + + serd_stack_pop_to(&reader->stack, + string_start_offset + serd_node_length(dest)); + return SERD_SUCCESS; } @@ -889,14 +974,15 @@ read_number(SerdReader* reader, SerdNode** dest, bool* ate_dot) static SerdStatus read_iri(SerdReader* reader, SerdNode** dest, bool* ate_dot) { - switch (peek_byte(reader)) { - case '<': + if (peek_byte(reader) == '<') { return read_IRIREF(reader, dest); - default: - *dest = push_node(reader, SERD_CURIE, "", 0); - return *dest ? read_PrefixedName(reader, *dest, true, ate_dot) - : SERD_ERR_OVERFLOW; } + + if (!(*dest = push_node(reader, SERD_CURIE, "", 0))) { + return SERD_ERR_OVERFLOW; + } + + return read_PrefixedName(reader, *dest, true, ate_dot, reader->stack.size); } static SerdStatus @@ -986,10 +1072,11 @@ read_verb(SerdReader* reader, SerdNode** dest) return SERD_ERR_OVERFLOW; } - SerdStatus st = read_PN_PREFIX(reader, *dest); - bool ate_dot = false; - SerdNode* node = *dest; - const int next = peek_byte(reader); + const size_t string_start_offset = reader->stack.size; + SerdStatus st = read_PN_PREFIX(reader, *dest); + bool ate_dot = false; + SerdNode* node = *dest; + const int next = peek_byte(reader); if (!st && node->n_bytes == 1 && serd_node_string(node)[0] == 'a' && next != ':' && !is_PN_CHARS_BASE((uint32_t)next)) { serd_stack_pop_to(&reader->stack, orig_stack_size); @@ -998,7 +1085,8 @@ read_verb(SerdReader* reader, SerdNode** dest) : SERD_ERR_OVERFLOW); } - if (st > SERD_FAILURE || read_PrefixedName(reader, *dest, false, &ate_dot) || + if (st > SERD_FAILURE || + read_PrefixedName(reader, *dest, false, &ate_dot, string_start_offset) || ate_dot) { *dest = NULL; return r_err(reader, SERD_ERR_BAD_SYNTAX, "bad verb\n"); @@ -1207,7 +1295,7 @@ read_object(SerdReader* reader, ReadContext* ctx, bool emit, bool* ate_dot) ++orig_cursor.col; ret = read_literal(reader, &o, ate_dot); break; - default: + default: { /* Either a boolean literal, or a qname. Read the prefix first, and if it is in fact a "true" or "false" literal, produce that instead. */ @@ -1215,8 +1303,10 @@ read_object(SerdReader* reader, ReadContext* ctx, bool emit, bool* ate_dot) return SERD_ERR_OVERFLOW; } + const size_t string_start_offset = reader->stack.size; while (!read_PN_CHARS_BASE(reader, o)) { } + if ((o->n_bytes == 4 && !memcmp(serd_node_string(o), "true", 4)) || (o->n_bytes == 5 && !memcmp(serd_node_string(o), "false", 5))) { o->flags |= SERD_HAS_DATATYPE; @@ -1229,12 +1319,14 @@ read_object(SerdReader* reader, ReadContext* ctx, bool emit, bool* ate_dot) } else if (read_PN_PREFIX_tail(reader, o) > SERD_FAILURE) { ret = SERD_ERR_BAD_SYNTAX; } else { - if ((ret = read_PrefixedName(reader, o, false, ate_dot))) { + if ((ret = read_PrefixedName( + reader, o, false, ate_dot, string_start_offset))) { ret = ret > SERD_FAILURE ? ret : SERD_ERR_BAD_SYNTAX; return r_err(reader, ret, "expected prefixed name\n"); } } } + } if (!ret && emit && simple && o) { serd_node_zero_pad(o); @@ -1475,6 +1567,7 @@ read_base(SerdReader* reader, bool sparql, bool token) SerdNode* uri = NULL; TRY(st, read_IRIREF(reader, &uri)); serd_node_zero_pad(uri); + TRY(st, serd_env_set_base_uri(reader->env, serd_node_string_view(uri))); TRY(st, serd_sink_write_base(reader->sink, uri)); read_ws_star(reader); @@ -1517,12 +1610,18 @@ read_prefixID(SerdReader* reader, bool sparql, bool token) serd_node_zero_pad(name); serd_node_zero_pad(uri); - st = serd_sink_write_prefix(reader->sink, name, uri); + + TRY(st, + serd_env_set_prefix( + reader->env, serd_node_string_view(name), serd_node_string_view(uri))); + + TRY(st, serd_sink_write_prefix(reader->sink, name, uri)); if (!sparql) { read_ws_star(reader); st = eat_byte_check(reader, '.') ? SERD_SUCCESS : SERD_ERR_BAD_SYNTAX; } + return st; } diff --git a/src/node_syntax.c b/src/node_syntax.c index 88d360dd..fc442f33 100644 --- a/src/node_syntax.c +++ b/src/node_syntax.c @@ -55,19 +55,26 @@ serd_node_from_syntax(const char* const str, const SerdSyntax syntax) SerdNode* object = NULL; SerdWorld* const world = serd_world_new(); + SerdEnv* const env = serd_env_new(SERD_EMPTY_STRING()); SerdSink* const sink = serd_sink_new(&object, on_node_string_event, NULL); SerdByteSource* const source = serd_byte_source_new_string(doc, NULL); SerdReader* const reader = serd_reader_new( - world, syntax, SERD_READ_EXACT_BLANKS, sink, 1024 + doc_len); + world, + syntax, + SERD_READ_EXACT_BLANKS | SERD_READ_PREFIXED | SERD_READ_RELATIVE, + env, + sink, + 1024 + doc_len); serd_world_set_error_func(world, quiet_error_func, NULL); serd_reader_start(reader, source); serd_reader_read_document(reader); serd_reader_finish(reader); - serd_byte_source_free(source); serd_reader_free(reader); + serd_byte_source_free(source); serd_sink_free(sink); + serd_env_free(env); serd_world_free(world); free(doc); @@ -92,5 +99,10 @@ serd_node_to_syntax(const SerdNode* const node, const SerdSyntax syntax) result = serd_buffer_sink_finish(&buffer); } + serd_writer_free(writer); + serd_byte_sink_free(out); + serd_env_free(env); + serd_world_free(world); + return result; } diff --git a/src/reader.c b/src/reader.c index 4c4bffa5..ba3559bc 100644 --- a/src/reader.c +++ b/src/reader.c @@ -158,7 +158,8 @@ serd_reader_read_document(SerdReader* reader) SerdReader* serd_reader_new(SerdWorld* const world, const SerdSyntax syntax, - SerdReaderFlags flags, + const SerdReaderFlags flags, + SerdEnv* const env, const SerdSink* const sink, const size_t stack_size) { @@ -170,6 +171,7 @@ serd_reader_new(SerdWorld* const world, me->world = world; me->sink = sink; + me->env = env; me->stack = serd_stack_new(stack_size, sizeof(SerdNode)); me->syntax = syntax; me->flags = flags; diff --git a/src/reader.h b/src/reader.h index 9bdbf72f..8fba7504 100644 --- a/src/reader.h +++ b/src/reader.h @@ -58,6 +58,7 @@ struct SerdReaderImpl { SerdNode* rdf_rest; SerdNode* rdf_nil; SerdByteSource* source; + SerdEnv* env; SerdStack stack; SerdSyntax syntax; SerdReaderFlags flags; @@ -172,10 +173,7 @@ push_byte(SerdReader* reader, SerdNode* node, const int c) } static inline SerdStatus -push_bytes(SerdReader* reader, - SerdNode* ref, - const uint8_t* bytes, - unsigned len) +push_bytes(SerdReader* reader, SerdNode* ref, const uint8_t* bytes, size_t len) { const bool has_space = reader->stack.buf_size >= reader->stack.size + len; if (has_space) { diff --git a/src/serdi.c b/src/serdi.c index 2801a2da..78de1ad3 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -95,6 +95,7 @@ static SerdStatus read_file(SerdWorld* const world, SerdSyntax syntax, const SerdReaderFlags flags, + SerdEnv* const env, const SerdSink* const sink, const size_t stack_size, const char* filename, @@ -122,7 +123,8 @@ read_file(SerdWorld* const world, return SERD_ERR_UNKNOWN; } - SerdReader* reader = serd_reader_new(world, syntax, flags, sink, stack_size); + SerdReader* reader = + serd_reader_new(world, syntax, flags, env, sink, stack_size); serd_reader_add_blank_prefix(reader, add_prefix); @@ -326,6 +328,7 @@ main(int argc, char** argv) serd_reader_new(world, input_syntax ? input_syntax : SERD_TRIG, reader_flags, + env, serd_writer_sink(writer), stack_size); @@ -362,6 +365,7 @@ main(int argc, char** argv) if ((st = read_file(world, input_syntax, reader_flags, + env, serd_writer_sink(writer), stack_size, inputs[i], diff --git a/src/string.c b/src/string.c index b5904c84..4c6a1135 100644 --- a/src/string.c +++ b/src/string.c @@ -64,6 +64,8 @@ serd_strerror(SerdStatus status) return "Error writing to file"; case SERD_ERR_BAD_CALL: return "Invalid call"; + case SERD_ERR_BAD_URI: + return "Invalid or unresolved URI"; default: break; } diff --git a/src/writer.c b/src/writer.c index c520638e..f55b47d4 100644 --- a/src/writer.c +++ b/src/writer.c @@ -129,7 +129,7 @@ struct SerdWriterImpl { SerdSink iface; SerdSyntax syntax; SerdWriterFlags flags; - SerdEnv* env; + const SerdEnv* env; SerdNode* root_node; SerdURIView root_uri; WriteContext* anon_stack; @@ -1184,7 +1184,7 @@ SerdWriter* serd_writer_new(SerdWorld* world, SerdSyntax syntax, SerdWriterFlags flags, - SerdEnv* env, + const SerdEnv* env, SerdByteSink* byte_sink) { const WriteContext context = WRITE_CONTEXT_NULL; @@ -1230,22 +1230,18 @@ serd_writer_set_base_uri(SerdWriter* writer, const SerdNode* uri) return SERD_ERR_BAD_ARG; } - SerdStatus st = - serd_env_set_base_uri(writer->env, serd_node_string_view(uri)); - - if (!st) { - if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) { - if (ctx(writer, SERD_GRAPH) || ctx(writer, SERD_SUBJECT)) { - TRY(st, esink(" .\n\n", 4, writer)); - reset_context(writer, true); - } - TRY(st, esink("@base <", 7, writer)); - TRY(st, esink(serd_node_string(uri), uri->n_bytes, writer)); - TRY(st, esink("> .\n", 4, writer)); + SerdStatus st = SERD_SUCCESS; + if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) { + if (ctx(writer, SERD_GRAPH) || ctx(writer, SERD_SUBJECT)) { + TRY(st, esink(" .\n\n", 4, writer)); + reset_context(writer, true); } - writer->indent = 0; - reset_context(writer, true); + TRY(st, esink("@base <", 7, writer)); + TRY(st, esink(serd_node_string(uri), uri->n_bytes, writer)); + TRY(st, esink("> .\n", 4, writer)); } + writer->indent = 0; + reset_context(writer, true); return st; } @@ -1273,27 +1269,23 @@ serd_writer_set_prefix(SerdWriter* writer, return SERD_ERR_BAD_ARG; } - SerdStatus st = serd_env_set_prefix( - writer->env, serd_node_string_view(name), serd_node_string_view(uri)); - - if (!st) { - if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) { - if (ctx(writer, SERD_GRAPH) || ctx(writer, SERD_SUBJECT)) { - TRY(st, esink(" .\n\n", 4, writer)); - reset_context(writer, true); - } - - TRY(st, esink("@prefix ", 8, writer)); - TRY(st, esink(serd_node_string(name), name->n_bytes, writer)); - TRY(st, esink(": <", 3, writer)); - TRY(st, write_uri_from_node(writer, uri)); - TRY(st, esink("> .\n", 4, writer)); + SerdStatus st = SERD_SUCCESS; + if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) { + if (ctx(writer, SERD_GRAPH) || ctx(writer, SERD_SUBJECT)) { + TRY(st, esink(" .\n\n", 4, writer)); + reset_context(writer, true); } - writer->indent = 0; - reset_context(writer, true); + TRY(st, esink("@prefix ", 8, writer)); + TRY(st, esink(serd_node_string(name), name->n_bytes, writer)); + TRY(st, esink(": <", 3, writer)); + TRY(st, write_uri_from_node(writer, uri)); + TRY(st, esink("> .\n", 4, writer)); } + writer->indent = 0; + reset_context(writer, true); + return st; } @@ -1317,8 +1309,8 @@ serd_writer_sink(SerdWriter* writer) return &writer->iface; } -SerdEnv* -serd_writer_env(SerdWriter* writer) +const SerdEnv* +serd_writer_env(const SerdWriter* writer) { return writer->env; } |