From 8c67f9eba47d30913749e607c440b170a5cbd804 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Thu, 15 Apr 2021 17:52:44 -0400 Subject: [WIP] Expand URIs in reader This expands relative and prefixed URIs in the reader on the stack, rather than passing them to the caller to be dealt with. This pushes these context-full forms to the edge of the system as much as possible to minimise the headaches they can cause. Towards having stricter guarantees about nodes and eliminating the CURIE node type altogether. --- src/env.c | 15 +++++++ src/env.h | 8 ++++ src/node.c | 2 +- src/node.h | 3 ++ src/read_turtle.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++++------ src/reader.h | 24 ++++++----- 6 files changed, 147 insertions(+), 24 deletions(-) (limited to 'src') diff --git a/src/env.c b/src/env.c index afbf1064..13d8129e 100644 --- a/src/env.c +++ b/src/env.c @@ -139,6 +139,21 @@ serd_env_set_base_uri(SerdEnv* const env, const SerdStringView uri) return SERD_SUCCESS; } +SerdStringView +serd_env_find_prefix(const SerdEnv* const env, const SerdStringView name) +{ + for (size_t i = 0; i < env->n_prefixes; ++i) { + const SerdNode* const prefix_name = env->prefixes[i].name; + if (prefix_name->length == name.length) { + if (!memcmp(serd_node_string(prefix_name), name.data, name.length)) { + return serd_node_string_view(env->prefixes[i].uri); + } + } + } + + return serd_empty_string(); +} + ZIX_PURE_FUNC static SerdPrefix* serd_env_find(const SerdEnv* const env, const char* const name, diff --git a/src/env.h b/src/env.h index 21ce169a..a4f0e1ed 100644 --- a/src/env.h +++ b/src/env.h @@ -20,6 +20,14 @@ serd_env_qualify_in_place(const SerdEnv* env, const SerdNode** prefix, SerdStringView* suffix); +/** + Return the URI for the prefix with the given name. + + If no such prefix is known, returns an empty string view. +*/ +ZIX_PURE_FUNC SerdStringView +serd_env_find_prefix(const SerdEnv* env, SerdStringView name); + /** Expand `curie`. diff --git a/src/node.c b/src/node.c index f1fbf0e6..e0df7a86 100644 --- a/src/node.c +++ b/src/node.c @@ -104,7 +104,7 @@ serd_node_check_padding(const SerdNode* node) #endif } -static ZIX_PURE_FUNC size_t +size_t serd_node_total_size(const SerdNode* const node) { return node ? (sizeof(SerdNode) + serd_node_pad_length(node->length) + diff --git a/src/node.h b/src/node.h index ed5a0c8f..93590bfa 100644 --- a/src/node.h +++ b/src/node.h @@ -54,6 +54,9 @@ void serd_node_set(SerdNode* ZIX_NONNULL* ZIX_NONNULL dst, const SerdNode* ZIX_NONNULL src); +ZIX_PURE_FUNC size_t +serd_node_total_size(const SerdNode* ZIX_NULLABLE node); + void serd_node_zero_pad(SerdNode* ZIX_NONNULL node); diff --git a/src/read_turtle.c b/src/read_turtle.c index 8d9ec78a..fa7b9731 100644 --- a/src/read_turtle.c +++ b/src/read_turtle.c @@ -3,6 +3,7 @@ #include "read_turtle.h" #include "byte_source.h" +#include "env.h" #include "namespaces.h" #include "node.h" #include "ntriples.h" @@ -21,6 +22,8 @@ #include "serd/statement.h" #include "serd/status.h" #include "serd/string_view.h" +#include "serd/uri.h" +#include "zix/attributes.h" #include #include @@ -292,6 +295,66 @@ read_PN_PREFIX(SerdReader* const reader, SerdNode* const dest) return st ? st : read_PN_PREFIX_tail(reader, dest); } +typedef struct { + SerdReader* reader; + SerdNode* node; + SerdStatus status; +} WriteNodeContext; + +static size_t +write_to_stack(const void* const ZIX_NONNULL buf, + const size_t size, + const size_t nmemb, + void* const ZIX_NONNULL stream) +{ + WriteNodeContext* const ctx = (WriteNodeContext*)stream; + const uint8_t* const utf8 = (const uint8_t*)buf; + + ctx->status = push_bytes(ctx->reader, ctx->node, utf8, nmemb * size); + + return nmemb; +} + +static SerdStatus +resolve_IRIREF(SerdReader* const reader, + SerdNode* const dest, + const size_t string_start_offset) +{ + // If the URI is already absolute, we don't need to do anything + if (serd_uri_string_has_scheme(serd_node_string(dest))) { + return SERD_SUCCESS; + } + + // Parse the URI reference so we can resolve it + SerdURIView uri = serd_parse_uri(serd_node_string(dest)); + + // Resolve relative URI reference to a full URI + uri = serd_resolve_uri(uri, serd_env_base_uri_view(reader->env)); + if (!uri.scheme.length) { + return r_err(reader, + SERD_BAD_SYNTAX, + "failed to resolve relative URI reference <%s>", + serd_node_string(dest)); + } + + // Push a new temporary node for constructing the resolved URI + SerdNode* const temp = push_node(reader, SERD_URI, "", 0); + if (!temp) { + return SERD_BAD_STACK; + } + + // Write resolved URI to the temporary node + WriteNodeContext ctx = {reader, temp, SERD_SUCCESS}; + temp->length = serd_write_uri(uri, write_to_stack, &ctx); + if (!ctx.status) { + // Replace the destination with the new expanded node + memmove(dest, temp, serd_node_total_size(temp)); + serd_stack_pop_to(&reader->stack, string_start_offset + dest->length); + } + + return ctx.status; +} + static SerdStatus read_IRIREF(SerdReader* const reader, SerdNode** const dest) { @@ -302,14 +365,24 @@ read_IRIREF(SerdReader* const reader, SerdNode** const dest) return SERD_BAD_STACK; } - return read_IRIREF_suffix(reader, *dest); + const size_t string_start_offset = reader->stack.size; + + st = read_IRIREF_suffix(reader, *dest); + if (!tolerate_status(reader, st)) { + return st; + } + + return (reader->flags & SERD_READ_RELATIVE) + ? SERD_SUCCESS + : resolve_IRIREF(reader, *dest, string_start_offset); } static SerdStatus read_PrefixedName(SerdReader* const reader, SerdNode* const dest, const bool read_prefix, - bool* const ate_dot) + bool* const ate_dot, + const size_t string_start_offset) { SerdStatus st = SERD_SUCCESS; if (read_prefix) { @@ -320,8 +393,24 @@ read_PrefixedName(SerdReader* const reader, return SERD_FAILURE; } - TRY(st, push_byte(reader, dest, eat_byte_safe(reader, ':'))); - TRY_FAILING(st, read_PN_LOCAL(reader, dest, ate_dot)); + skip_byte(reader, ':'); + + // Search environment for the prefix URI + const SerdStringView prefix = serd_node_string_view(dest); + const SerdStringView prefix_uri = serd_env_find_prefix(reader->env, prefix); + if (!prefix_uri.length) { + return r_err(reader, st, "unknown prefix \"%s\"", prefix.data); + } + + // Pop back to the start of the string + serd_stack_pop_to(&reader->stack, string_start_offset); + dest->length = 0U; + dest->type = SERD_URI; + push_bytes(reader, dest, (const uint8_t*)prefix_uri.data, prefix_uri.length); + if ((st = read_PN_LOCAL(reader, dest, ate_dot)) > SERD_FAILURE) { + return st; + } + return SERD_SUCCESS; } @@ -420,14 +509,15 @@ read_turtle_iri(SerdReader* const reader, SerdNode** const dest, bool* const ate_dot) { - switch (peek_byte(reader)) { - case '<': + if (peek_byte(reader) == '<') { return read_IRIREF(reader, dest); - default: - *dest = push_node(reader, SERD_CURIE, "", 0); - return *dest ? read_PrefixedName(reader, *dest, true, ate_dot) - : SERD_BAD_STACK; } + + if (!(*dest = push_node(reader, SERD_CURIE, "", 0))) { + return SERD_BAD_STACK; + } + + return read_PrefixedName(reader, *dest, true, ate_dot, reader->stack.size); } static SerdStatus @@ -481,7 +571,8 @@ read_verb(SerdReader* reader, SerdNode** const dest) return SERD_BAD_STACK; } - SerdStatus st = SERD_SUCCESS; + const size_t string_start_offset = reader->stack.size; + SerdStatus st = SERD_SUCCESS; TRY_LAX(st, read_PN_PREFIX(reader, *dest)); bool ate_dot = false; @@ -495,7 +586,9 @@ read_verb(SerdReader* reader, SerdNode** const dest) : SERD_BAD_STACK); } - if ((st = read_PrefixedName(reader, *dest, false, &ate_dot)) || ate_dot) { + if ((st = read_PrefixedName( + reader, *dest, false, &ate_dot, string_start_offset)) || + ate_dot) { *dest = NULL; return r_err( reader, st > SERD_FAILURE ? st : SERD_BAD_SYNTAX, "expected verb"); @@ -586,7 +679,7 @@ read_named_object(SerdReader* const reader, SerdStatus st = SERD_SUCCESS; // Attempt to read a prefixed name - st = read_PrefixedName(reader, node, true, ate_dot); + st = read_PrefixedName(reader, node, true, ate_dot, reader->stack.size); // Check if this is actually a special boolean node if (st == SERD_FAILURE && (node_has_string(node, true_string) || diff --git a/src/reader.h b/src/reader.h index 302f8c6f..a98d5ef8 100644 --- a/src/reader.h +++ b/src/reader.h @@ -178,19 +178,23 @@ push_byte(SerdReader* reader, SerdNode* node, const int c) } static inline SerdStatus -push_bytes(SerdReader* reader, - SerdNode* ref, - const uint8_t* bytes, - unsigned len) +push_bytes(SerdReader* const reader, + SerdNode* const node, + const uint8_t* const bytes, + const size_t len) { - const bool has_space = reader->stack.buf_size >= reader->stack.size + len; - if (has_space) { - for (unsigned i = 0; i < len; ++i) { - push_byte(reader, ref, bytes[i]); - } + if (reader->stack.buf_size < reader->stack.size + len) { + return SERD_BAD_STACK; + } + + const size_t begin = reader->stack.size - 1U; + for (unsigned i = 0U; i < len; ++i) { + reader->stack.buf[begin + i] = (char)bytes[i]; } - return has_space ? SERD_SUCCESS : SERD_BAD_STACK; + reader->stack.size += len; + node->length += len; + return SERD_SUCCESS; } #endif // SERD_SRC_READER_H -- cgit v1.2.1