diff options
-rw-r--r-- | include/serd/serd.h | 3 | ||||
-rw-r--r-- | src/n3.c | 123 | ||||
-rw-r--r-- | src/reader.h | 5 | ||||
-rw-r--r-- | src/string.c | 2 | ||||
-rw-r--r-- | test/test_overflow.c | 44 | ||||
-rw-r--r-- | test/test_string.c | 2 | ||||
-rw-r--r-- | test/test_writer.c | 36 |
7 files changed, 191 insertions, 24 deletions
diff --git a/include/serd/serd.h b/include/serd/serd.h index 0db6205f..7be9395c 100644 --- a/include/serd/serd.h +++ b/include/serd/serd.h @@ -215,6 +215,7 @@ typedef enum { SERD_ERR_BAD_WRITE, ///< Error writing to file/stream SERD_ERR_NO_DATA, ///< Unexpected end of input SERD_ERR_BAD_CALL, ///< Invalid call + SERD_ERR_BAD_URI, ///< Invalid or unresolved URI } SerdStatus; /** @@ -1739,6 +1740,8 @@ typedef enum { SERD_READ_LAX = 1u << 0u, ///< Tolerate invalid input where possible SERD_READ_VARIABLES = 1u << 1u, ///< Support variable nodes SERD_READ_EXACT_BLANKS = 1u << 2u, ///< Allow clashes with generated blanks + SERD_READ_PREFIXED = 1u << 3u, ///< Do not expand prefixed names + SERD_READ_RELATIVE = 1u << 4u, ///< Do not expand relative URI references } SerdReaderFlag; /// Bitwise OR of SerdReaderFlag values @@ -15,6 +15,7 @@ */ #include "byte_source.h" +#include "env.h" #include "namespaces.h" #include "node.h" #include "reader.h" @@ -732,6 +733,64 @@ read_IRIREF_scheme(SerdReader* const reader, SerdNode* const dest) return SERD_FAILURE; } +typedef struct { + SerdReader* reader; + SerdNode* node; + SerdStatus status; +} WriteNodeContext; + +static size_t +write_to_stack(const void* const SERD_NONNULL buf, + const size_t size, + const size_t nmemb, + void* const SERD_NONNULL stream) +{ + WriteNodeContext* const ctx = (WriteNodeContext*)stream; + const uint8_t* const utf8 = (const uint8_t*)buf; + + ctx->status = push_bytes(ctx->reader, ctx->node, utf8, nmemb * size); + + return nmemb; +} + +static SerdStatus +resolve_IRIREF(SerdReader* const reader, + SerdNode* const dest, + const size_t string_start_offset) +{ + // If the URI is already absolute, we don't need to do anything + SerdURIView uri = serd_parse_uri(serd_node_string(dest)); + if (uri.scheme.len) { + return SERD_SUCCESS; + } + + // Resolve relative URI reference to a full URI + uri = serd_resolve_uri(uri, serd_env_base_uri_view(reader->env)); + if (!uri.scheme.len) { + return r_err(reader, + SERD_ERR_BAD_SYNTAX, + "failed to resolve relative URI reference <%s>", + serd_node_string(dest)); + } + + // Push a new temporary node for constructing the resolved URI + SerdNode* const temp = push_node(reader, SERD_URI, "", 0); + if (!temp) { + return SERD_ERR_OVERFLOW; + } + + // Write resolved URI to the temporary node + WriteNodeContext ctx = {reader, temp, SERD_SUCCESS}; + temp->length = serd_write_uri(uri, write_to_stack, &ctx); + if (!ctx.status) { + // Replace the destination with the new expanded node + memmove(dest, temp, serd_node_total_size(temp)); + serd_stack_pop_to(&reader->stack, string_start_offset + dest->length); + } + + return ctx.status; +} + static SerdStatus read_IRIREF(SerdReader* const reader, SerdNode** const dest) { @@ -744,6 +803,8 @@ read_IRIREF(SerdReader* const reader, SerdNode** const dest) return SERD_ERR_OVERFLOW; } + const size_t string_start_offset = reader->stack.size; + if (!fancy_syntax(reader) && (st = read_IRIREF_scheme(reader, *dest))) { return r_err(reader, st, "expected IRI scheme"); } @@ -757,7 +818,9 @@ read_IRIREF(SerdReader* const reader, SerdNode** const dest) return r_err( reader, SERD_ERR_BAD_SYNTAX, "invalid IRI character `%c'", c); case '>': - return SERD_SUCCESS; + return (reader->flags & SERD_READ_RELATIVE) + ? SERD_SUCCESS + : resolve_IRIREF(reader, *dest, string_start_offset); case '\\': if (read_UCHAR(reader, *dest, &code)) { return r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid IRI escape"); @@ -810,7 +873,8 @@ static SerdStatus read_PrefixedName(SerdReader* const reader, SerdNode* const dest, const bool read_prefix, - bool* const ate_dot) + bool* const ate_dot, + const size_t string_start_offset) { SerdStatus st = SERD_SUCCESS; if (read_prefix && ((st = read_PN_PREFIX(reader, dest)) > SERD_FAILURE)) { @@ -822,10 +886,35 @@ read_PrefixedName(SerdReader* const reader, } if ((st = push_byte(reader, dest, eat_byte_safe(reader, ':'))) || - (st = read_PN_LOCAL(reader, dest, ate_dot)) > SERD_FAILURE) { + (st = read_PN_LOCAL(reader, dest, ate_dot)) > SERD_FAILURE || + (reader->flags & SERD_READ_PREFIXED)) { return st; } + // Expand to absolute URI + const SerdStringView curie = serd_node_string_view(dest); + SerdStringView prefix; + SerdStringView suffix; + if ((st = serd_env_expand_in_place(reader->env, curie, &prefix, &suffix))) { + return r_err( + reader, st, "failed to expand URI \"%s\"", serd_node_string(dest)); + } + + // Push a new temporary node for constructing the full URI + SerdNode* const temp = push_node(reader, SERD_URI, "", 0); + if ((st = push_bytes(reader, temp, (const uint8_t*)prefix.buf, prefix.len)) || + (st = push_bytes(reader, temp, (const uint8_t*)suffix.buf, suffix.len))) { + return st; + } + + // Replace the destination with the new expanded node + const size_t total_size = serd_node_total_size(temp); + + memmove(dest, temp, total_size); + + serd_stack_pop_to(&reader->stack, + string_start_offset + serd_node_length(dest)); + return SERD_SUCCESS; } @@ -919,14 +1008,15 @@ read_number(SerdReader* const reader, static SerdStatus read_iri(SerdReader* const reader, SerdNode** const dest, bool* const ate_dot) { - switch (peek_byte(reader)) { - case '<': + if (peek_byte(reader) == '<') { return read_IRIREF(reader, dest); - default: - *dest = push_node(reader, SERD_CURIE, "", 0); - return *dest ? read_PrefixedName(reader, *dest, true, ate_dot) - : SERD_ERR_OVERFLOW; } + + if (!(*dest = push_node(reader, SERD_CURIE, "", 0))) { + return SERD_ERR_OVERFLOW; + } + + return read_PrefixedName(reader, *dest, true, ate_dot, reader->stack.size); } static SerdStatus @@ -1018,7 +1108,8 @@ read_verb(SerdReader* reader, SerdNode** dest) return SERD_ERR_OVERFLOW; } - SerdStatus st = read_PN_PREFIX(reader, *dest); + const size_t string_start_offset = reader->stack.size; + SerdStatus st = read_PN_PREFIX(reader, *dest); if (st > SERD_FAILURE) { return st; } @@ -1034,7 +1125,9 @@ read_verb(SerdReader* reader, SerdNode** dest) : SERD_ERR_OVERFLOW); } - if ((st = read_PrefixedName(reader, *dest, false, &ate_dot)) || ate_dot) { + if ((st = read_PrefixedName( + reader, *dest, false, &ate_dot, string_start_offset)) || + ate_dot) { *dest = NULL; return r_err( reader, st > SERD_FAILURE ? st : SERD_ERR_BAD_SYNTAX, "expected verb"); @@ -1231,7 +1324,7 @@ read_object(SerdReader* const reader, case '\'': ret = read_literal(reader, &o, ate_dot); break; - default: + default: { /* Either a boolean literal, or a qname. Read the prefix first, and if it is in fact a "true" or "false" literal, produce that instead. */ @@ -1239,6 +1332,7 @@ read_object(SerdReader* const reader, return SERD_ERR_OVERFLOW; } + const size_t string_start_offset = reader->stack.size; while (!(ret = read_PN_CHARS_BASE(reader, o))) { } @@ -1256,11 +1350,13 @@ read_object(SerdReader* const reader, ret = SERD_SUCCESS; } } else if ((ret = read_PN_PREFIX_tail(reader, o)) > SERD_FAILURE || - (ret = read_PrefixedName(reader, o, false, ate_dot))) { + (ret = read_PrefixedName( + reader, o, false, ate_dot, string_start_offset))) { ret = (ret > SERD_FAILURE) ? ret : SERD_ERR_BAD_SYNTAX; return r_err(reader, ret, "expected prefixed name"); } } + } if (!ret && emit && simple && o) { ret = emit_statement(reader, *ctx, o); @@ -1569,6 +1665,7 @@ read_prefixID(SerdReader* const reader, const bool sparql, const bool token) read_ws_star(reader); st = eat_byte_check(reader, '.'); } + return st; } diff --git a/src/reader.h b/src/reader.h index 2374d7de..acef8ce2 100644 --- a/src/reader.h +++ b/src/reader.h @@ -164,10 +164,7 @@ push_byte(SerdReader* reader, SerdNode* node, const int c) } static inline SerdStatus -push_bytes(SerdReader* reader, - SerdNode* ref, - const uint8_t* bytes, - unsigned len) +push_bytes(SerdReader* reader, SerdNode* ref, const uint8_t* bytes, size_t len) { const bool has_space = reader->stack.buf_size >= reader->stack.size + len; if (has_space) { diff --git a/src/string.c b/src/string.c index 13fb9263..97c1432b 100644 --- a/src/string.c +++ b/src/string.c @@ -60,6 +60,8 @@ serd_strerror(const SerdStatus status) return "Unexpected end of input"; case SERD_ERR_BAD_CALL: return "Invalid call"; + case SERD_ERR_BAD_URI: + return "Invalid or unresolved URI"; } return "Unknown error"; diff --git a/test/test_overflow.c b/test/test_overflow.c index 7f08112d..02b71008 100644 --- a/test/test_overflow.c +++ b/test/test_overflow.c @@ -21,7 +21,7 @@ #include <assert.h> #include <stdio.h> -static const size_t min_stack_size = 4 * sizeof(size_t) + 256u; +static const size_t min_stack_size = 4 * sizeof(size_t) + 230u; static const size_t max_stack_size = 1024u; static SerdStatus @@ -90,11 +90,9 @@ static void test_turtle_overflow(void) { static const char* const test_strings[] = { - "<http://example.org/s> <http://example.org/p> :%99 .", "<http://example.org/s> <http://example.org/p> <http://example.org/> .", "<http://example.org/s> <http://example.org/p> " "<thisisanabsurdlylongurischeme://because/testing/> .", - "<http://example.org/s> <http://example.org/p> eg:foo .", "<http://example.org/s> <http://example.org/p> 1234 .", "<http://example.org/s> <http://example.org/p> (1 2 3 4) .", "<http://example.org/s> <http://example.org/p> (((((((42))))))) .", @@ -112,7 +110,41 @@ test_turtle_overflow(void) "@prefix ug.dot: <http://example.org/> . \nug.dot:s ug.dot:p ug.dot:o .\n", // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) - "@prefix øøøøøøøøø: <http://example.org/long> . \n" + "<http://example.org/subject/with/a/long/path> " + "<http://example.org/predicate/with/a/long/path> " + "<http://example.org/object/with/a/long/path> .", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "<http://example.org/s> <http://example.org/p> " + "\"typed\"^^<http://example.org/Datatype> .", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "@prefix eg: <http://example.org/ns/test> .\n" + "<http://example.org/s> <http://example.org/p> " + "\"typed\"^^eg:Datatype .", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "@prefix eg: <http://example.org/ns/test> .\n" + "<http://example.org/s> <http://example.org/p> eg:foo .", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "@prefix prefix: <http://example.org/testing/curies> .\n" + "prefix:subject prefix:predicate prefix:object .\n", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "@prefix eg: <http://example.org/> .\n" + "eg:s eg:p [ eg:p [ eg:p [ eg:p [ eg:p []]]]] .\n", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "@prefix eg: <http://example.org/> .\n" + "eg:s eg:p ( 1 2 3 ( 4 5 6 ( 7 8 9 ) ) ) .\n", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "@prefix eg: <http://example.org/ns/test> .\n" + "<http://example.org/s> <http://example.org/p> eg:%99 .", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "@prefix øøøøøøøøø: <http://example.org/long> .\n" "<http://example.org/somewhatlongsubjecttooffsetthepredicate> øøøøøøøøø:p " "øøøøøøøøø:o .\n", @@ -140,8 +172,8 @@ test_turtle_overflow(void) // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) "@prefix prefix: <http://example.org/testing/curies> .\n" - "prefix:subjectthatwillcomearoundtobeingfinishedanycharacternow " - "prefix:predicate prefix:object .\n", + "<http://example.org/very/long/uri/subject/to/overflow/the/predicate> " + "prefix:predicate prefix:object ; prefix:p prefix:o .\n", // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) "@prefix eg: <http://example.org/> .\n" diff --git a/test/test_string.c b/test/test_string.c index 495138d8..2f805015 100644 --- a/test/test_string.c +++ b/test/test_string.c @@ -44,7 +44,7 @@ test_strerror(void) { const char* msg = serd_strerror(SERD_SUCCESS); assert(!strcmp(msg, "Success")); - for (int i = SERD_FAILURE; i <= SERD_ERR_BAD_CALL; ++i) { + for (int i = SERD_FAILURE; i <= SERD_ERR_BAD_URI; ++i) { msg = serd_strerror((SerdStatus)i); assert(strcmp(msg, "Success")); } diff --git a/test/test_writer.c b/test/test_writer.c index dc1ebfcb..0ced87d6 100644 --- a/test/test_writer.c +++ b/test/test_writer.c @@ -286,6 +286,41 @@ test_write_empty_syntax(void) serd_world_free(world); } +static void +test_write_bad_uri(void) +{ + SerdWorld* world = serd_world_new(); + SerdNodes* nodes = serd_world_nodes(world); + SerdEnv* env = serd_env_new(SERD_EMPTY_STRING()); + + const SerdNode* s = + serd_nodes_uri(nodes, SERD_STRING("http://example.org/s")); + + const SerdNode* p = + serd_nodes_uri(nodes, SERD_STRING("http://example.org/p")); + + const SerdNode* rel = serd_nodes_uri(nodes, SERD_STRING("rel")); + + SerdBuffer buffer = {NULL, 0}; + SerdByteSink* byte_sink = serd_byte_sink_new_buffer(&buffer); + + SerdWriter* writer = + serd_writer_new(world, SERD_NTRIPLES, 0u, env, byte_sink); + + assert(writer); + + const SerdStatus st = + serd_sink_write(serd_writer_sink(writer), 0u, s, p, rel, NULL); + assert(st); + assert(st == SERD_ERR_BAD_ARG); + + serd_free(serd_buffer_sink_finish(&buffer)); + serd_writer_free(writer); + serd_byte_sink_free(byte_sink); + serd_env_free(env); + serd_world_free(world); +} + int main(void) { @@ -295,6 +330,7 @@ main(void) test_strict_write(); test_write_error(); test_write_empty_syntax(); + test_write_bad_uri(); return 0; } |