From f93c3fdd6c7d6ca61bec55d3c1ffae7e7c793913 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Wed, 29 Mar 2023 19:59:50 -0400 Subject: Fix relative URI creation --- NEWS | 3 ++- src/uri.c | 14 +++++----- src/uri_utils.h | 54 ++++++++++++++++++++++++------------- test/pretty/manifest.ttl | 7 +++++ test/pretty/relative-uris.ttl | 2 ++ test/root/root-inside-base-out.ttl | 4 +-- test/root/root-outside-base-out.ttl | 4 +-- test/test_uri.c | 13 --------- 8 files changed, 58 insertions(+), 43 deletions(-) create mode 100644 test/pretty/relative-uris.ttl diff --git a/NEWS b/NEWS index 0c1ca82e..9b654a35 100644 --- a/NEWS +++ b/NEWS @@ -12,6 +12,7 @@ serd (0.31.0) unstable; urgency=medium * Fix incorrect parsing of strange quote escape patterns * Fix possible hang when writing nested Turtle lists * Fix potential memory leaks when a write is aborted + * Fix relative URI creation * Gracefully handle bad characters in Turtle blank node syntax * Gracefully handle bad characters in Turtle datatype syntax * Improve TriG pretty-printing and remove trailing newlines @@ -20,7 +21,7 @@ serd (0.31.0) unstable; urgency=medium * Replace duplicated dox_to_sphinx script with sphinxygen dependency * Test header for warnings more strictly - -- David Robillard Sun, 26 Mar 2023 23:59:39 +0000 + -- David Robillard Wed, 29 Mar 2023 23:55:55 +0000 serd (0.30.16) stable; urgency=medium diff --git a/src/uri.c b/src/uri.c index 8acb79b3..dcdd8083 100644 --- a/src/uri.c +++ b/src/uri.c @@ -1,4 +1,4 @@ -// Copyright 2011-2020 David Robillard +// Copyright 2011-2023 David Robillard // SPDX-License-Identifier: ISC #include "string_utils.h" @@ -428,10 +428,6 @@ write_rel_path(SerdSink sink, len += sink("../", 3, stream); } - if (last_shared_sep == 0 && up == 0) { - len += sink("/", 1, stream); - } - // Write suffix return len + write_path_tail(sink, stream, uri, last_shared_sep + 1); } @@ -468,8 +464,12 @@ serd_uri_serialise_relative(const SerdURI* const uri, if (uri->authority.buf) { len += sink("//", 2, stream); len += sink(uri->authority.buf, uri->authority.len, stream); - if (uri->authority.len > 0 && - uri->authority.buf[uri->authority.len - 1] != '/' && + + const bool authority_ends_with_slash = + (uri->authority.len > 0 && + uri->authority.buf[uri->authority.len - 1] == '/'); + + if (!authority_ends_with_slash && serd_uri_path_starts_without_slash(uri)) { // Special case: ensure path begins with a slash // https://tools.ietf.org/html/rfc3986#section-3.2 diff --git a/src/uri_utils.h b/src/uri_utils.h index 16819191..e2f30edb 100644 --- a/src/uri_utils.h +++ b/src/uri_utils.h @@ -12,6 +12,11 @@ #include #include +typedef struct { + size_t shared; + size_t root; +} SlashIndexes; + static inline bool chunk_equals(const SerdChunk* a, const SerdChunk* b) { @@ -33,51 +38,64 @@ uri_path_at(const SerdURI* uri, size_t i) } /** - Return the index of the first differing character after the last root slash, - or zero if `uri` is not under `root`. + Return the index of the last slash shared with the root, or `SIZE_MAX`. + + The index of the next slash found in the root is also returned, so the two + can be compared to determine if the URI is within the root (if the shared + slash is the last in the root, then the URI is a child of the root, + otherwise it may merely share some leading path components). */ -static inline SERD_PURE_FUNC size_t +static inline SERD_PURE_FUNC SlashIndexes uri_rooted_index(const SerdURI* uri, const SerdURI* root) { + SlashIndexes indexes = {SIZE_MAX, SIZE_MAX}; + if (!root || !root->scheme.len || !chunk_equals(&root->scheme, &uri->scheme) || !chunk_equals(&root->authority, &uri->authority)) { - return 0; + return indexes; } - bool differ = false; - const size_t path_len = uri_path_len(uri); - const size_t root_len = uri_path_len(root); - size_t last_root_slash = 0; - for (size_t i = 0; i < path_len && i < root_len; ++i) { + const size_t path_len = uri_path_len(uri); + const size_t root_len = uri_path_len(root); + const size_t min_len = path_len < root_len ? path_len : root_len; + for (size_t i = 0; i < min_len; ++i) { const uint8_t u = uri_path_at(uri, i); const uint8_t r = uri_path_at(root, i); - differ = differ || u != r; - if (r == '/') { - last_root_slash = i; - if (differ) { - return 0; + if (u == r) { + if (u == '/') { + indexes.root = indexes.shared = i; } + } else { + for (size_t j = i; j < root_len; ++j) { + if (uri_path_at(root, j) == '/') { + indexes.root = j; + break; + } + } + + return indexes; } } - return last_root_slash + 1; + return indexes; } /** Return true iff `uri` shares path components with `root` */ static inline SERD_PURE_FUNC bool uri_is_related(const SerdURI* uri, const SerdURI* root) { - return uri_rooted_index(uri, root) > 0; + return uri_rooted_index(uri, root).shared != SIZE_MAX; } /** Return true iff `uri` is within the base of `root` */ static inline SERD_PURE_FUNC bool uri_is_under(const SerdURI* uri, const SerdURI* root) { - const size_t index = uri_rooted_index(uri, root); - return index > 0 && uri->path.len > index; + const SlashIndexes indexes = uri_rooted_index(uri, root); + return indexes.shared && indexes.shared != SIZE_MAX && + indexes.shared == indexes.root; } static inline bool diff --git a/test/pretty/manifest.ttl b/test/pretty/manifest.ttl index 1f653d65..69b951af 100644 --- a/test/pretty/manifest.ttl +++ b/test/pretty/manifest.ttl @@ -32,6 +32,7 @@ <#many-objects> <#named-graph> <#nested-list-object> + <#relative-uris> <#short-string-escapes> <#uri-escapes> <#nested-list-subject> @@ -193,6 +194,12 @@ mf:name "nested-list-subject" ; mf:result . +<#relative-uris> + a rdft:TestTurtleEval ; + mf:action ; + mf:name "relative-uris" ; + mf:result . + <#short-string-escapes> a rdft:TestTurtleEval ; mf:action ; diff --git a/test/pretty/relative-uris.ttl b/test/pretty/relative-uris.ttl new file mode 100644 index 00000000..397c7cf7 --- /dev/null +++ b/test/pretty/relative-uris.ttl @@ -0,0 +1,2 @@ + + . diff --git a/test/root/root-inside-base-out.ttl b/test/root/root-inside-base-out.ttl index a5081e4d..76f61791 100644 --- a/test/root/root-inside-base-out.ttl +++ b/test/root/root-inside-base-out.ttl @@ -1,7 +1,7 @@ @base . @prefix rdfs: . - +<> rdfs:label "base node" . @@ -10,7 +10,7 @@ rdfs:label "within root" . - + rdfs:label "within base" . diff --git a/test/root/root-outside-base-out.ttl b/test/root/root-outside-base-out.ttl index 47a4c47e..dcb95d5a 100644 --- a/test/root/root-outside-base-out.ttl +++ b/test/root/root-outside-base-out.ttl @@ -4,7 +4,7 @@ <> rdfs:label "base node" . - +<../> rdfs:label "root node" . @@ -13,7 +13,7 @@ <../s> rdfs:label "within root" . -<../../s> + rdfs:label "outside root" . diff --git a/test/test_uri.c b/test/test_uri.c index bcd670e5..ac24a2be 100644 --- a/test/test_uri.c +++ b/test/test_uri.c @@ -265,12 +265,8 @@ test_relative_uri(void) // Related base - /* Expected: check_relative_uri( "http://example.org/a/b", "http://example.org/", NULL, "a/b"); - Actual: */ - check_relative_uri( - "http://example.org/a/b", "http://example.org/", NULL, "/a/b"); check_relative_uri( "http://example.org/a/b", "http://example.org/a/", NULL, "b"); @@ -286,10 +282,7 @@ test_relative_uri(void) check_relative_uri("http://example.org/", "http://example.org/", NULL, ""); - /* Expected: check_relative_uri("http://example.org/", "http://example.org/a", NULL, ""); - Actual: */ - check_relative_uri("http://example.org/", "http://example.org/a", NULL, "/"); check_relative_uri( "http://example.org/", "http://example.org/a/", NULL, "../"); @@ -337,10 +330,7 @@ test_relative_uri(void) check_relative_uri("http://example.org/a/", "http://example.org/a/", "http://example.org/a/", - /* Expected: ""); - Actual: */ - "http://example.org/a/"); check_relative_uri("http://example.org/a/b", "http://example.org/a/b/c", @@ -350,10 +340,7 @@ test_relative_uri(void) check_relative_uri("http://example.org/a", "http://example.org/a/b/c", "http://example.org/a/b", - /* Expected: "http://example.org/a"); - Actual: */ - "../../a"); } int -- cgit v1.2.1