diff options
-rw-r--r-- | include/serd/reader.h | 2 | ||||
-rw-r--r-- | src/env.c | 15 | ||||
-rw-r--r-- | src/env.h | 8 | ||||
-rw-r--r-- | src/node.c | 2 | ||||
-rw-r--r-- | src/node.h | 3 | ||||
-rw-r--r-- | src/read_turtle.c | 119 | ||||
-rw-r--r-- | src/reader.h | 24 | ||||
-rw-r--r-- | test/extra/bad/bad-prefix-dot.ttl | 1 | ||||
-rw-r--r-- | test/extra/bad/manifest.ttl | 8 | ||||
-rw-r--r-- | test/extra/full/full-uris.ttl | 6 | ||||
-rw-r--r-- | test/extra/good/manifest.ttl | 14 | ||||
-rw-r--r-- | test/extra/good/test-local-name-ends-with-dot.nt | 1 | ||||
-rw-r--r-- | test/extra/good/test-local-name-escapes.nt | 17 | ||||
-rw-r--r-- | test/extra/good/test-local-name-escapes.ttl | 19 | ||||
-rw-r--r-- | test/meson.build | 1 | ||||
-rw-r--r-- | test/test_overflow.c | 44 | ||||
-rw-r--r-- | test/test_reader_writer.c | 2 | ||||
-rw-r--r-- | test/test_writer.c | 31 |
18 files changed, 282 insertions, 35 deletions
diff --git a/include/serd/reader.h b/include/serd/reader.h index 658193fb..b6b9cac3 100644 --- a/include/serd/reader.h +++ b/include/serd/reader.h @@ -32,6 +32,8 @@ typedef struct SerdReaderImpl SerdReader; typedef enum { SERD_READ_LAX = 1U << 0U, ///< Tolerate invalid input where possible SERD_READ_VARIABLES = 1U << 1U, ///< Support variable nodes + SERD_READ_PREFIXED = 1U << 2U, ///< Do not expand prefixed names + SERD_READ_RELATIVE = 1U << 3U, ///< Do not expand relative URI references } SerdReaderFlag; /// Bitwise OR of SerdReaderFlag values @@ -139,6 +139,21 @@ serd_env_set_base_uri(SerdEnv* const env, const SerdStringView uri) return SERD_SUCCESS; } +SerdStringView +serd_env_find_prefix(const SerdEnv* const env, const SerdStringView name) +{ + for (size_t i = 0; i < env->n_prefixes; ++i) { + const SerdNode* const prefix_name = env->prefixes[i].name; + if (prefix_name->length == name.length) { + if (!memcmp(serd_node_string(prefix_name), name.data, name.length)) { + return serd_node_string_view(env->prefixes[i].uri); + } + } + } + + return serd_empty_string(); +} + ZIX_PURE_FUNC static SerdPrefix* serd_env_find(const SerdEnv* const env, const char* const name, @@ -21,6 +21,14 @@ serd_env_qualify_in_place(const SerdEnv* env, SerdStringView* suffix); /** + Return the URI for the prefix with the given name. + + If no such prefix is known, returns an empty string view. +*/ +ZIX_PURE_FUNC SerdStringView +serd_env_find_prefix(const SerdEnv* env, SerdStringView name); + +/** Expand `curie`. Errors: SERD_BAD_ARG if `curie` is not valid, or SERD_BAD_CURIE if prefix is @@ -104,7 +104,7 @@ serd_node_check_padding(const SerdNode* node) #endif } -static ZIX_PURE_FUNC size_t +size_t serd_node_total_size(const SerdNode* const node) { return node ? (sizeof(SerdNode) + serd_node_pad_length(node->length) + @@ -54,6 +54,9 @@ void serd_node_set(SerdNode* ZIX_NONNULL* ZIX_NONNULL dst, const SerdNode* ZIX_NONNULL src); +ZIX_PURE_FUNC size_t +serd_node_total_size(const SerdNode* ZIX_NULLABLE node); + void serd_node_zero_pad(SerdNode* ZIX_NONNULL node); diff --git a/src/read_turtle.c b/src/read_turtle.c index 8d9ec78a..fa7b9731 100644 --- a/src/read_turtle.c +++ b/src/read_turtle.c @@ -3,6 +3,7 @@ #include "read_turtle.h" #include "byte_source.h" +#include "env.h" #include "namespaces.h" #include "node.h" #include "ntriples.h" @@ -21,6 +22,8 @@ #include "serd/statement.h" #include "serd/status.h" #include "serd/string_view.h" +#include "serd/uri.h" +#include "zix/attributes.h" #include <assert.h> #include <stdbool.h> @@ -292,6 +295,66 @@ read_PN_PREFIX(SerdReader* const reader, SerdNode* const dest) return st ? st : read_PN_PREFIX_tail(reader, dest); } +typedef struct { + SerdReader* reader; + SerdNode* node; + SerdStatus status; +} WriteNodeContext; + +static size_t +write_to_stack(const void* const ZIX_NONNULL buf, + const size_t size, + const size_t nmemb, + void* const ZIX_NONNULL stream) +{ + WriteNodeContext* const ctx = (WriteNodeContext*)stream; + const uint8_t* const utf8 = (const uint8_t*)buf; + + ctx->status = push_bytes(ctx->reader, ctx->node, utf8, nmemb * size); + + return nmemb; +} + +static SerdStatus +resolve_IRIREF(SerdReader* const reader, + SerdNode* const dest, + const size_t string_start_offset) +{ + // If the URI is already absolute, we don't need to do anything + if (serd_uri_string_has_scheme(serd_node_string(dest))) { + return SERD_SUCCESS; + } + + // Parse the URI reference so we can resolve it + SerdURIView uri = serd_parse_uri(serd_node_string(dest)); + + // Resolve relative URI reference to a full URI + uri = serd_resolve_uri(uri, serd_env_base_uri_view(reader->env)); + if (!uri.scheme.length) { + return r_err(reader, + SERD_BAD_SYNTAX, + "failed to resolve relative URI reference <%s>", + serd_node_string(dest)); + } + + // Push a new temporary node for constructing the resolved URI + SerdNode* const temp = push_node(reader, SERD_URI, "", 0); + if (!temp) { + return SERD_BAD_STACK; + } + + // Write resolved URI to the temporary node + WriteNodeContext ctx = {reader, temp, SERD_SUCCESS}; + temp->length = serd_write_uri(uri, write_to_stack, &ctx); + if (!ctx.status) { + // Replace the destination with the new expanded node + memmove(dest, temp, serd_node_total_size(temp)); + serd_stack_pop_to(&reader->stack, string_start_offset + dest->length); + } + + return ctx.status; +} + static SerdStatus read_IRIREF(SerdReader* const reader, SerdNode** const dest) { @@ -302,14 +365,24 @@ read_IRIREF(SerdReader* const reader, SerdNode** const dest) return SERD_BAD_STACK; } - return read_IRIREF_suffix(reader, *dest); + const size_t string_start_offset = reader->stack.size; + + st = read_IRIREF_suffix(reader, *dest); + if (!tolerate_status(reader, st)) { + return st; + } + + return (reader->flags & SERD_READ_RELATIVE) + ? SERD_SUCCESS + : resolve_IRIREF(reader, *dest, string_start_offset); } static SerdStatus read_PrefixedName(SerdReader* const reader, SerdNode* const dest, const bool read_prefix, - bool* const ate_dot) + bool* const ate_dot, + const size_t string_start_offset) { SerdStatus st = SERD_SUCCESS; if (read_prefix) { @@ -320,8 +393,24 @@ read_PrefixedName(SerdReader* const reader, return SERD_FAILURE; } - TRY(st, push_byte(reader, dest, eat_byte_safe(reader, ':'))); - TRY_FAILING(st, read_PN_LOCAL(reader, dest, ate_dot)); + skip_byte(reader, ':'); + + // Search environment for the prefix URI + const SerdStringView prefix = serd_node_string_view(dest); + const SerdStringView prefix_uri = serd_env_find_prefix(reader->env, prefix); + if (!prefix_uri.length) { + return r_err(reader, st, "unknown prefix \"%s\"", prefix.data); + } + + // Pop back to the start of the string + serd_stack_pop_to(&reader->stack, string_start_offset); + dest->length = 0U; + dest->type = SERD_URI; + push_bytes(reader, dest, (const uint8_t*)prefix_uri.data, prefix_uri.length); + if ((st = read_PN_LOCAL(reader, dest, ate_dot)) > SERD_FAILURE) { + return st; + } + return SERD_SUCCESS; } @@ -420,14 +509,15 @@ read_turtle_iri(SerdReader* const reader, SerdNode** const dest, bool* const ate_dot) { - switch (peek_byte(reader)) { - case '<': + if (peek_byte(reader) == '<') { return read_IRIREF(reader, dest); - default: - *dest = push_node(reader, SERD_CURIE, "", 0); - return *dest ? read_PrefixedName(reader, *dest, true, ate_dot) - : SERD_BAD_STACK; } + + if (!(*dest = push_node(reader, SERD_CURIE, "", 0))) { + return SERD_BAD_STACK; + } + + return read_PrefixedName(reader, *dest, true, ate_dot, reader->stack.size); } static SerdStatus @@ -481,7 +571,8 @@ read_verb(SerdReader* reader, SerdNode** const dest) return SERD_BAD_STACK; } - SerdStatus st = SERD_SUCCESS; + const size_t string_start_offset = reader->stack.size; + SerdStatus st = SERD_SUCCESS; TRY_LAX(st, read_PN_PREFIX(reader, *dest)); bool ate_dot = false; @@ -495,7 +586,9 @@ read_verb(SerdReader* reader, SerdNode** const dest) : SERD_BAD_STACK); } - if ((st = read_PrefixedName(reader, *dest, false, &ate_dot)) || ate_dot) { + if ((st = read_PrefixedName( + reader, *dest, false, &ate_dot, string_start_offset)) || + ate_dot) { *dest = NULL; return r_err( reader, st > SERD_FAILURE ? st : SERD_BAD_SYNTAX, "expected verb"); @@ -586,7 +679,7 @@ read_named_object(SerdReader* const reader, SerdStatus st = SERD_SUCCESS; // Attempt to read a prefixed name - st = read_PrefixedName(reader, node, true, ate_dot); + st = read_PrefixedName(reader, node, true, ate_dot, reader->stack.size); // Check if this is actually a special boolean node if (st == SERD_FAILURE && (node_has_string(node, true_string) || diff --git a/src/reader.h b/src/reader.h index 302f8c6f..a98d5ef8 100644 --- a/src/reader.h +++ b/src/reader.h @@ -178,19 +178,23 @@ push_byte(SerdReader* reader, SerdNode* node, const int c) } static inline SerdStatus -push_bytes(SerdReader* reader, - SerdNode* ref, - const uint8_t* bytes, - unsigned len) +push_bytes(SerdReader* const reader, + SerdNode* const node, + const uint8_t* const bytes, + const size_t len) { - const bool has_space = reader->stack.buf_size >= reader->stack.size + len; - if (has_space) { - for (unsigned i = 0; i < len; ++i) { - push_byte(reader, ref, bytes[i]); - } + if (reader->stack.buf_size < reader->stack.size + len) { + return SERD_BAD_STACK; + } + + const size_t begin = reader->stack.size - 1U; + for (unsigned i = 0U; i < len; ++i) { + reader->stack.buf[begin + i] = (char)bytes[i]; } - return has_space ? SERD_SUCCESS : SERD_BAD_STACK; + reader->stack.size += len; + node->length += len; + return SERD_SUCCESS; } #endif // SERD_SRC_READER_H diff --git a/test/extra/bad/bad-prefix-dot.ttl b/test/extra/bad/bad-prefix-dot.ttl new file mode 100644 index 00000000..7b02211f --- /dev/null +++ b/test/extra/bad/bad-prefix-dot.ttl @@ -0,0 +1 @@ +@prefix dotted.: <http://example.org/> . diff --git a/test/extra/bad/manifest.ttl b/test/extra/bad/manifest.ttl index cd4aee24..f930b812 100644 --- a/test/extra/bad/manifest.ttl +++ b/test/extra/bad/manifest.ttl @@ -74,8 +74,9 @@ <#bad-object2> <#bad-paths> <#bad-pn-escape> - <#bad-prefix-missing-colon> <#bad-prefix> + <#bad-prefix-dot> + <#bad-prefix-missing-colon> <#bad-quote-in-uri> <#bad-semicolon-after-subject> <#bad-string> @@ -436,6 +437,11 @@ mf:action <bad-prefix-missing-colon.ttl> ; mf:name "bad-prefix-missing-colon" . +<#bad-prefix-dot> + a rdft:TestTurtleNegativeSyntax ; + mf:action <bad-prefix-dot.ttl> ; + mf:name "bad-prefix-dot" . + <#bad-quote-in-uri> a rdft:TestTurtleNegativeSyntax ; mf:action <bad-quote-in-uri.ttl> ; diff --git a/test/extra/full/full-uris.ttl b/test/extra/full/full-uris.ttl index cfb048df..dd6c5144 100644 --- a/test/extra/full/full-uris.ttl +++ b/test/extra/full/full-uris.ttl @@ -1,7 +1,7 @@ @prefix eg: <http://example.org/> . <http://example.org/s1> - eg:prefixed false . + <http://example.org/prefixed> false . -eg:s2 - eg:prefixed true . +<http://example.org/s2> + <http://example.org/prefixed> true . diff --git a/test/extra/good/manifest.ttl b/test/extra/good/manifest.ttl index 7c356285..11a7ec5e 100644 --- a/test/extra/good/manifest.ttl +++ b/test/extra/good/manifest.ttl @@ -27,6 +27,8 @@ <#test-id> <#test-list-in-blank> <#test-list-subject> + <#test-local-name-ends-with-dot> + <#test-local-name-escapes> <#test-long-backspace-escape> <#test-long-delete-escape> <#test-long-form-feed-escape> @@ -169,6 +171,18 @@ mf:name "test-list-subject" ; mf:result <test-list-subject.nt> . +<#test-local-name-ends-with-dot> + a rdft:TestTurtleEval ; + mf:action <test-local-name-ends-with-dot.ttl> ; + mf:name "test-local-name-ends-with-dot" ; + mf:result <test-local-name-ends-with-dot.nt> . + +<#test-local-name-escapes> + a rdft:TestTurtleEval ; + mf:action <test-local-name-escapes.ttl> ; + mf:name "test-local-name-escapes" ; + mf:result <test-local-name-escapes.nt> . + <#test-long-backspace-escape> a rdft:TestTurtleEval ; mf:action <test-long-backspace-escape.ttl> ; diff --git a/test/extra/good/test-local-name-ends-with-dot.nt b/test/extra/good/test-local-name-ends-with-dot.nt new file mode 100644 index 00000000..3285348a --- /dev/null +++ b/test/extra/good/test-local-name-ends-with-dot.nt @@ -0,0 +1 @@ +<http://example.org/eg#s> <http://example.org/eg#p> <http://example.org/eg#foo.> . diff --git a/test/extra/good/test-local-name-escapes.nt b/test/extra/good/test-local-name-escapes.nt new file mode 100644 index 00000000..a6362d7a --- /dev/null +++ b/test/extra/good/test-local-name-escapes.nt @@ -0,0 +1,17 @@ +<http://example.org/s> <http://example.org/p> <http://example.org/o'> . +<http://example.org/s> <http://example.org/p> <http://example.org/o!> . +<http://example.org/s> <http://example.org/p> <http://example.org/o#> . +<http://example.org/s> <http://example.org/p> <http://example.org/o$> . +<http://example.org/s> <http://example.org/p> <http://example.org/o%> . +<http://example.org/s> <http://example.org/p> <http://example.org/o&> . +<http://example.org/s> <http://example.org/p> <http://example.org/o(> . +<http://example.org/s> <http://example.org/p> <http://example.org/o)> . +<http://example.org/s> <http://example.org/p> <http://example.org/o*> . +<http://example.org/s> <http://example.org/p> <http://example.org/o+> . +<http://example.org/s> <http://example.org/p> <http://example.org/o,> . +<http://example.org/s> <http://example.org/p> <http://example.org/o/> . +<http://example.org/s> <http://example.org/p> <http://example.org/o;> . +<http://example.org/s> <http://example.org/p> <http://example.org/o=> . +<http://example.org/s> <http://example.org/p> <http://example.org/o?> . +<http://example.org/s> <http://example.org/p> <http://example.org/o@> . +<http://example.org/s> <http://example.org/p> <http://example.org/o~> . diff --git a/test/extra/good/test-local-name-escapes.ttl b/test/extra/good/test-local-name-escapes.ttl new file mode 100644 index 00000000..8c5fce37 --- /dev/null +++ b/test/extra/good/test-local-name-escapes.ttl @@ -0,0 +1,19 @@ +@prefix eg: <http://example.org/> . + +eg:s eg:p eg:o\' . +eg:s eg:p eg:o\! . +eg:s eg:p eg:o\# . +eg:s eg:p eg:o\$ . +eg:s eg:p eg:o\% . +eg:s eg:p eg:o\& . +eg:s eg:p eg:o\( . +eg:s eg:p eg:o\) . +eg:s eg:p eg:o\* . +eg:s eg:p eg:o\+ . +eg:s eg:p eg:o\, . +eg:s eg:p eg:o\/ . +eg:s eg:p eg:o\; . +eg:s eg:p eg:o\= . +eg:s eg:p eg:o\? . +eg:s eg:p eg:o\@ . +eg:s eg:p eg:o\~ . diff --git a/test/meson.build b/test/meson.build index 01f75b5a..b01dde81 100644 --- a/test/meson.build +++ b/test/meson.build @@ -195,6 +195,7 @@ simple_command_tests = { ['-o'], ['-p'], ['-r'], + ['-s', '<foo> a <Bar> .'], ['-s'], ['-z'], ], diff --git a/test/test_overflow.c b/test/test_overflow.c index db20f734..9b1a6fb7 100644 --- a/test/test_overflow.c +++ b/test/test_overflow.c @@ -8,7 +8,7 @@ #include <assert.h> #include <stdio.h> -static const size_t min_stack_size = 4U * sizeof(size_t) + 240U; +static const size_t min_stack_size = 4U * sizeof(size_t) + 238U; static const size_t max_stack_size = 1024U; static SerdStatus @@ -89,11 +89,9 @@ static void test_turtle_overflow(void) { static const char* const test_strings[] = { - "<http://example.org/s> <http://example.org/p> :%99 .", "<http://example.org/s> <http://example.org/p> <http://example.org/> .", "<http://example.org/s> <http://example.org/p> " "<thisisanabsurdlylongurischeme://because/testing/> .", - "<http://example.org/s> <http://example.org/p> eg:foo .", "<http://example.org/s> <http://example.org/p> 1234 .", "<http://example.org/s> <http://example.org/p> (1 2 3 4) .", "<http://example.org/s> <http://example.org/p> (((((((42))))))) .", @@ -111,7 +109,41 @@ test_turtle_overflow(void) "@prefix ug.dot: <http://example.org/> . \nug.dot:s ug.dot:p ug.dot:o .\n", // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) - "@prefix øøøøøøøøø: <http://example.org/long> . \n" + "<http://example.org/subject/with/a/long/path> " + "<http://example.org/predicate/with/a/long/path> " + "<http://example.org/object/with/a/long/path> .", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "<http://example.org/s> <http://example.org/p> " + "\"typed\"^^<http://example.org/Datatype> .", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "@prefix eg: <http://example.org/ns/test> .\n" + "<http://example.org/s> <http://example.org/p> " + "\"typed\"^^eg:Datatype .", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "@prefix eg: <http://example.org/ns/test> .\n" + "<http://example.org/s> <http://example.org/p> eg:foo .", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "@prefix prefix: <http://example.org/testing/curies> .\n" + "prefix:subject prefix:predicate prefix:object .\n", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "@prefix eg: <http://example.org/> .\n" + "eg:s eg:p [ eg:p [ eg:p [ eg:p [ eg:p []]]]] .\n", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "@prefix eg: <http://example.org/> .\n" + "eg:s eg:p ( 1 2 3 ( 4 5 6 ( 7 8 9 ) ) ) .\n", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "@prefix eg: <http://example.org/ns/test> .\n" + "<http://example.org/s> <http://example.org/p> eg:%99 .", + + // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) + "@prefix øøøøøøøøø: <http://example.org/long> .\n" "<http://example.org/somewhatlongsubjecttooffsetthepredicate> øøøøøøøøø:p " "øøøøøøøøø:o .\n", @@ -139,8 +171,8 @@ test_turtle_overflow(void) // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) "@prefix prefix: <http://example.org/testing/curies> .\n" - "prefix:subjectthatwillcomearoundtobeingfinishedanycharacternow " - "prefix:predicate prefix:object .\n", + "<http://example.org/very/long/uri/subject/to/overflow/the/predicate> " + "prefix:predicate prefix:object ; prefix:p prefix:o .\n", // NOLINTNEXTLINE(bugprone-suspicious-missing-comma) "@prefix eg: <http://example.org/> .\n" diff --git a/test/test_reader_writer.c b/test/test_reader_writer.c index 3a20bb7a..55c4b584 100644 --- a/test/test_reader_writer.c +++ b/test/test_reader_writer.c @@ -100,7 +100,7 @@ test_write_errors(void) SerdWorld* const world = serd_world_new(); ErrorContext ctx = {0U, 0U}; - const size_t max_offsets[] = {0, 373, 1911, 2003, 414}; + const size_t max_offsets[] = {0, 368, 1900, 1992, 413}; // Test errors at different offsets to hit different code paths for (unsigned s = 1; s <= (unsigned)SERD_TRIG; ++s) { diff --git a/test/test_writer.c b/test/test_writer.c index f76cc800..ee602044 100644 --- a/test/test_writer.c +++ b/test/test_writer.c @@ -406,6 +406,36 @@ test_write_pname_escapes(void) check_pname_escape((const char*)last_escape, "eg:s\n\teg:p eg:wx%C3%B7 .\n"); } +static void +test_write_bad_uri(void) +{ + SerdWorld* world = serd_world_new(); + SerdEnv* env = serd_env_new(serd_empty_string()); + SerdNode* s = serd_new_uri(serd_string("http://example.org/s")); + SerdNode* p = serd_new_uri(serd_string("http://example.org/p")); + SerdNode* rel = serd_new_uri(serd_string("rel")); + SerdBuffer buffer = {NULL, 0}; + SerdOutputStream output = serd_open_output_buffer(&buffer); + SerdWriter* writer = + serd_writer_new(world, SERD_NTRIPLES, 0U, env, &output, 1); + + assert(writer); + + const SerdStatus st = + serd_sink_write(serd_writer_sink(writer), 0U, s, p, rel, NULL); + assert(st); + assert(st == SERD_BAD_ARG); + + serd_writer_free(writer); + serd_close_output(&output); + serd_free(buffer.buf); + serd_node_free(rel); + serd_node_free(p); + serd_node_free(s); + serd_env_free(env); + serd_world_free(world); +} + int main(void) { @@ -418,6 +448,7 @@ main(void) test_writer_stack_overflow(); test_write_empty_syntax(); test_write_pname_escapes(); + test_write_bad_uri(); return 0; } |