aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/serd/serd.h3
-rw-r--r--src/n3.c123
-rw-r--r--src/reader.h5
-rw-r--r--src/string.c2
-rw-r--r--test/test_overflow.c44
-rw-r--r--test/test_string.c2
-rw-r--r--test/test_writer.c36
7 files changed, 191 insertions, 24 deletions
diff --git a/include/serd/serd.h b/include/serd/serd.h
index 0db6205f..7be9395c 100644
--- a/include/serd/serd.h
+++ b/include/serd/serd.h
@@ -215,6 +215,7 @@ typedef enum {
SERD_ERR_BAD_WRITE, ///< Error writing to file/stream
SERD_ERR_NO_DATA, ///< Unexpected end of input
SERD_ERR_BAD_CALL, ///< Invalid call
+ SERD_ERR_BAD_URI, ///< Invalid or unresolved URI
} SerdStatus;
/**
@@ -1739,6 +1740,8 @@ typedef enum {
SERD_READ_LAX = 1u << 0u, ///< Tolerate invalid input where possible
SERD_READ_VARIABLES = 1u << 1u, ///< Support variable nodes
SERD_READ_EXACT_BLANKS = 1u << 2u, ///< Allow clashes with generated blanks
+ SERD_READ_PREFIXED = 1u << 3u, ///< Do not expand prefixed names
+ SERD_READ_RELATIVE = 1u << 4u, ///< Do not expand relative URI references
} SerdReaderFlag;
/// Bitwise OR of SerdReaderFlag values
diff --git a/src/n3.c b/src/n3.c
index 145a0400..72582fa7 100644
--- a/src/n3.c
+++ b/src/n3.c
@@ -15,6 +15,7 @@
*/
#include "byte_source.h"
+#include "env.h"
#include "namespaces.h"
#include "node.h"
#include "reader.h"
@@ -732,6 +733,64 @@ read_IRIREF_scheme(SerdReader* const reader, SerdNode* const dest)
return SERD_FAILURE;
}
+typedef struct {
+ SerdReader* reader;
+ SerdNode* node;
+ SerdStatus status;
+} WriteNodeContext;
+
+static size_t
+write_to_stack(const void* const SERD_NONNULL buf,
+ const size_t size,
+ const size_t nmemb,
+ void* const SERD_NONNULL stream)
+{
+ WriteNodeContext* const ctx = (WriteNodeContext*)stream;
+ const uint8_t* const utf8 = (const uint8_t*)buf;
+
+ ctx->status = push_bytes(ctx->reader, ctx->node, utf8, nmemb * size);
+
+ return nmemb;
+}
+
+static SerdStatus
+resolve_IRIREF(SerdReader* const reader,
+ SerdNode* const dest,
+ const size_t string_start_offset)
+{
+ // If the URI is already absolute, we don't need to do anything
+ SerdURIView uri = serd_parse_uri(serd_node_string(dest));
+ if (uri.scheme.len) {
+ return SERD_SUCCESS;
+ }
+
+ // Resolve relative URI reference to a full URI
+ uri = serd_resolve_uri(uri, serd_env_base_uri_view(reader->env));
+ if (!uri.scheme.len) {
+ return r_err(reader,
+ SERD_ERR_BAD_SYNTAX,
+ "failed to resolve relative URI reference <%s>",
+ serd_node_string(dest));
+ }
+
+ // Push a new temporary node for constructing the resolved URI
+ SerdNode* const temp = push_node(reader, SERD_URI, "", 0);
+ if (!temp) {
+ return SERD_ERR_OVERFLOW;
+ }
+
+ // Write resolved URI to the temporary node
+ WriteNodeContext ctx = {reader, temp, SERD_SUCCESS};
+ temp->length = serd_write_uri(uri, write_to_stack, &ctx);
+ if (!ctx.status) {
+ // Replace the destination with the new expanded node
+ memmove(dest, temp, serd_node_total_size(temp));
+ serd_stack_pop_to(&reader->stack, string_start_offset + dest->length);
+ }
+
+ return ctx.status;
+}
+
static SerdStatus
read_IRIREF(SerdReader* const reader, SerdNode** const dest)
{
@@ -744,6 +803,8 @@ read_IRIREF(SerdReader* const reader, SerdNode** const dest)
return SERD_ERR_OVERFLOW;
}
+ const size_t string_start_offset = reader->stack.size;
+
if (!fancy_syntax(reader) && (st = read_IRIREF_scheme(reader, *dest))) {
return r_err(reader, st, "expected IRI scheme");
}
@@ -757,7 +818,9 @@ read_IRIREF(SerdReader* const reader, SerdNode** const dest)
return r_err(
reader, SERD_ERR_BAD_SYNTAX, "invalid IRI character `%c'", c);
case '>':
- return SERD_SUCCESS;
+ return (reader->flags & SERD_READ_RELATIVE)
+ ? SERD_SUCCESS
+ : resolve_IRIREF(reader, *dest, string_start_offset);
case '\\':
if (read_UCHAR(reader, *dest, &code)) {
return r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid IRI escape");
@@ -810,7 +873,8 @@ static SerdStatus
read_PrefixedName(SerdReader* const reader,
SerdNode* const dest,
const bool read_prefix,
- bool* const ate_dot)
+ bool* const ate_dot,
+ const size_t string_start_offset)
{
SerdStatus st = SERD_SUCCESS;
if (read_prefix && ((st = read_PN_PREFIX(reader, dest)) > SERD_FAILURE)) {
@@ -822,10 +886,35 @@ read_PrefixedName(SerdReader* const reader,
}
if ((st = push_byte(reader, dest, eat_byte_safe(reader, ':'))) ||
- (st = read_PN_LOCAL(reader, dest, ate_dot)) > SERD_FAILURE) {
+ (st = read_PN_LOCAL(reader, dest, ate_dot)) > SERD_FAILURE ||
+ (reader->flags & SERD_READ_PREFIXED)) {
return st;
}
+ // Expand to absolute URI
+ const SerdStringView curie = serd_node_string_view(dest);
+ SerdStringView prefix;
+ SerdStringView suffix;
+ if ((st = serd_env_expand_in_place(reader->env, curie, &prefix, &suffix))) {
+ return r_err(
+ reader, st, "failed to expand URI \"%s\"", serd_node_string(dest));
+ }
+
+ // Push a new temporary node for constructing the full URI
+ SerdNode* const temp = push_node(reader, SERD_URI, "", 0);
+ if ((st = push_bytes(reader, temp, (const uint8_t*)prefix.buf, prefix.len)) ||
+ (st = push_bytes(reader, temp, (const uint8_t*)suffix.buf, suffix.len))) {
+ return st;
+ }
+
+ // Replace the destination with the new expanded node
+ const size_t total_size = serd_node_total_size(temp);
+
+ memmove(dest, temp, total_size);
+
+ serd_stack_pop_to(&reader->stack,
+ string_start_offset + serd_node_length(dest));
+
return SERD_SUCCESS;
}
@@ -919,14 +1008,15 @@ read_number(SerdReader* const reader,
static SerdStatus
read_iri(SerdReader* const reader, SerdNode** const dest, bool* const ate_dot)
{
- switch (peek_byte(reader)) {
- case '<':
+ if (peek_byte(reader) == '<') {
return read_IRIREF(reader, dest);
- default:
- *dest = push_node(reader, SERD_CURIE, "", 0);
- return *dest ? read_PrefixedName(reader, *dest, true, ate_dot)
- : SERD_ERR_OVERFLOW;
}
+
+ if (!(*dest = push_node(reader, SERD_CURIE, "", 0))) {
+ return SERD_ERR_OVERFLOW;
+ }
+
+ return read_PrefixedName(reader, *dest, true, ate_dot, reader->stack.size);
}
static SerdStatus
@@ -1018,7 +1108,8 @@ read_verb(SerdReader* reader, SerdNode** dest)
return SERD_ERR_OVERFLOW;
}
- SerdStatus st = read_PN_PREFIX(reader, *dest);
+ const size_t string_start_offset = reader->stack.size;
+ SerdStatus st = read_PN_PREFIX(reader, *dest);
if (st > SERD_FAILURE) {
return st;
}
@@ -1034,7 +1125,9 @@ read_verb(SerdReader* reader, SerdNode** dest)
: SERD_ERR_OVERFLOW);
}
- if ((st = read_PrefixedName(reader, *dest, false, &ate_dot)) || ate_dot) {
+ if ((st = read_PrefixedName(
+ reader, *dest, false, &ate_dot, string_start_offset)) ||
+ ate_dot) {
*dest = NULL;
return r_err(
reader, st > SERD_FAILURE ? st : SERD_ERR_BAD_SYNTAX, "expected verb");
@@ -1231,7 +1324,7 @@ read_object(SerdReader* const reader,
case '\'':
ret = read_literal(reader, &o, ate_dot);
break;
- default:
+ default: {
/* Either a boolean literal, or a qname. Read the prefix first, and if
it is in fact a "true" or "false" literal, produce that instead.
*/
@@ -1239,6 +1332,7 @@ read_object(SerdReader* const reader,
return SERD_ERR_OVERFLOW;
}
+ const size_t string_start_offset = reader->stack.size;
while (!(ret = read_PN_CHARS_BASE(reader, o))) {
}
@@ -1256,11 +1350,13 @@ read_object(SerdReader* const reader,
ret = SERD_SUCCESS;
}
} else if ((ret = read_PN_PREFIX_tail(reader, o)) > SERD_FAILURE ||
- (ret = read_PrefixedName(reader, o, false, ate_dot))) {
+ (ret = read_PrefixedName(
+ reader, o, false, ate_dot, string_start_offset))) {
ret = (ret > SERD_FAILURE) ? ret : SERD_ERR_BAD_SYNTAX;
return r_err(reader, ret, "expected prefixed name");
}
}
+ }
if (!ret && emit && simple && o) {
ret = emit_statement(reader, *ctx, o);
@@ -1569,6 +1665,7 @@ read_prefixID(SerdReader* const reader, const bool sparql, const bool token)
read_ws_star(reader);
st = eat_byte_check(reader, '.');
}
+
return st;
}
diff --git a/src/reader.h b/src/reader.h
index 2374d7de..acef8ce2 100644
--- a/src/reader.h
+++ b/src/reader.h
@@ -164,10 +164,7 @@ push_byte(SerdReader* reader, SerdNode* node, const int c)
}
static inline SerdStatus
-push_bytes(SerdReader* reader,
- SerdNode* ref,
- const uint8_t* bytes,
- unsigned len)
+push_bytes(SerdReader* reader, SerdNode* ref, const uint8_t* bytes, size_t len)
{
const bool has_space = reader->stack.buf_size >= reader->stack.size + len;
if (has_space) {
diff --git a/src/string.c b/src/string.c
index 13fb9263..97c1432b 100644
--- a/src/string.c
+++ b/src/string.c
@@ -60,6 +60,8 @@ serd_strerror(const SerdStatus status)
return "Unexpected end of input";
case SERD_ERR_BAD_CALL:
return "Invalid call";
+ case SERD_ERR_BAD_URI:
+ return "Invalid or unresolved URI";
}
return "Unknown error";
diff --git a/test/test_overflow.c b/test/test_overflow.c
index 7f08112d..02b71008 100644
--- a/test/test_overflow.c
+++ b/test/test_overflow.c
@@ -21,7 +21,7 @@
#include <assert.h>
#include <stdio.h>
-static const size_t min_stack_size = 4 * sizeof(size_t) + 256u;
+static const size_t min_stack_size = 4 * sizeof(size_t) + 230u;
static const size_t max_stack_size = 1024u;
static SerdStatus
@@ -90,11 +90,9 @@ static void
test_turtle_overflow(void)
{
static const char* const test_strings[] = {
- "<http://example.org/s> <http://example.org/p> :%99 .",
"<http://example.org/s> <http://example.org/p> <http://example.org/> .",
"<http://example.org/s> <http://example.org/p> "
"<thisisanabsurdlylongurischeme://because/testing/> .",
- "<http://example.org/s> <http://example.org/p> eg:foo .",
"<http://example.org/s> <http://example.org/p> 1234 .",
"<http://example.org/s> <http://example.org/p> (1 2 3 4) .",
"<http://example.org/s> <http://example.org/p> (((((((42))))))) .",
@@ -112,7 +110,41 @@ test_turtle_overflow(void)
"@prefix ug.dot: <http://example.org/> . \nug.dot:s ug.dot:p ug.dot:o .\n",
// NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
- "@prefix øøøøøøøøø: <http://example.org/long> . \n"
+ "<http://example.org/subject/with/a/long/path> "
+ "<http://example.org/predicate/with/a/long/path> "
+ "<http://example.org/object/with/a/long/path> .",
+
+ // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
+ "<http://example.org/s> <http://example.org/p> "
+ "\"typed\"^^<http://example.org/Datatype> .",
+
+ // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
+ "@prefix eg: <http://example.org/ns/test> .\n"
+ "<http://example.org/s> <http://example.org/p> "
+ "\"typed\"^^eg:Datatype .",
+
+ // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
+ "@prefix eg: <http://example.org/ns/test> .\n"
+ "<http://example.org/s> <http://example.org/p> eg:foo .",
+
+ // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
+ "@prefix prefix: <http://example.org/testing/curies> .\n"
+ "prefix:subject prefix:predicate prefix:object .\n",
+
+ // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
+ "@prefix eg: <http://example.org/> .\n"
+ "eg:s eg:p [ eg:p [ eg:p [ eg:p [ eg:p []]]]] .\n",
+
+ // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
+ "@prefix eg: <http://example.org/> .\n"
+ "eg:s eg:p ( 1 2 3 ( 4 5 6 ( 7 8 9 ) ) ) .\n",
+
+ // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
+ "@prefix eg: <http://example.org/ns/test> .\n"
+ "<http://example.org/s> <http://example.org/p> eg:%99 .",
+
+ // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
+ "@prefix øøøøøøøøø: <http://example.org/long> .\n"
"<http://example.org/somewhatlongsubjecttooffsetthepredicate> øøøøøøøøø:p "
"øøøøøøøøø:o .\n",
@@ -140,8 +172,8 @@ test_turtle_overflow(void)
// NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
"@prefix prefix: <http://example.org/testing/curies> .\n"
- "prefix:subjectthatwillcomearoundtobeingfinishedanycharacternow "
- "prefix:predicate prefix:object .\n",
+ "<http://example.org/very/long/uri/subject/to/overflow/the/predicate> "
+ "prefix:predicate prefix:object ; prefix:p prefix:o .\n",
// NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
"@prefix eg: <http://example.org/> .\n"
diff --git a/test/test_string.c b/test/test_string.c
index 495138d8..2f805015 100644
--- a/test/test_string.c
+++ b/test/test_string.c
@@ -44,7 +44,7 @@ test_strerror(void)
{
const char* msg = serd_strerror(SERD_SUCCESS);
assert(!strcmp(msg, "Success"));
- for (int i = SERD_FAILURE; i <= SERD_ERR_BAD_CALL; ++i) {
+ for (int i = SERD_FAILURE; i <= SERD_ERR_BAD_URI; ++i) {
msg = serd_strerror((SerdStatus)i);
assert(strcmp(msg, "Success"));
}
diff --git a/test/test_writer.c b/test/test_writer.c
index dc1ebfcb..0ced87d6 100644
--- a/test/test_writer.c
+++ b/test/test_writer.c
@@ -286,6 +286,41 @@ test_write_empty_syntax(void)
serd_world_free(world);
}
+static void
+test_write_bad_uri(void)
+{
+ SerdWorld* world = serd_world_new();
+ SerdNodes* nodes = serd_world_nodes(world);
+ SerdEnv* env = serd_env_new(SERD_EMPTY_STRING());
+
+ const SerdNode* s =
+ serd_nodes_uri(nodes, SERD_STRING("http://example.org/s"));
+
+ const SerdNode* p =
+ serd_nodes_uri(nodes, SERD_STRING("http://example.org/p"));
+
+ const SerdNode* rel = serd_nodes_uri(nodes, SERD_STRING("rel"));
+
+ SerdBuffer buffer = {NULL, 0};
+ SerdByteSink* byte_sink = serd_byte_sink_new_buffer(&buffer);
+
+ SerdWriter* writer =
+ serd_writer_new(world, SERD_NTRIPLES, 0u, env, byte_sink);
+
+ assert(writer);
+
+ const SerdStatus st =
+ serd_sink_write(serd_writer_sink(writer), 0u, s, p, rel, NULL);
+ assert(st);
+ assert(st == SERD_ERR_BAD_ARG);
+
+ serd_free(serd_buffer_sink_finish(&buffer));
+ serd_writer_free(writer);
+ serd_byte_sink_free(byte_sink);
+ serd_env_free(env);
+ serd_world_free(world);
+}
+
int
main(void)
{
@@ -295,6 +330,7 @@ main(void)
test_strict_write();
test_write_error();
test_write_empty_syntax();
+ test_write_bad_uri();
return 0;
}