aboutsummaryrefslogtreecommitdiffstats
path: root/src/uri.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/uri.c')
-rw-r--r--src/uri.c376
1 files changed, 181 insertions, 195 deletions
diff --git a/src/uri.c b/src/uri.c
index f40fde83..88e1bd1e 100644
--- a/src/uri.c
+++ b/src/uri.c
@@ -26,12 +26,13 @@
#include <string.h>
char*
-serd_file_uri_parse(const char* uri, char** hostname)
+serd_parse_file_uri(const char* uri, char** hostname)
{
const char* path = uri;
if (hostname) {
*hostname = NULL;
}
+
if (!strncmp(uri, "file://", 7)) {
const char* auth = uri + 7;
if (*auth == '/') { // No hostname
@@ -40,6 +41,7 @@ serd_file_uri_parse(const char* uri, char** hostname)
if (!(path = strchr(auth, '/'))) {
return NULL;
}
+
if (hostname) {
*hostname = (char*)calloc((size_t)(path - auth + 1), 1);
memcpy(*hostname, auth, (size_t)(path - auth));
@@ -69,36 +71,34 @@ serd_file_uri_parse(const char* uri, char** hostname)
serd_buffer_sink(s, 1, &buffer);
}
}
+
return serd_buffer_sink_finish(&buffer);
}
+/// RFC3986: scheme ::= ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
bool
-serd_uri_string_has_scheme(const char* utf8)
+serd_uri_string_has_scheme(const char* const string)
{
- // RFC3986: scheme ::= ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
- if (!utf8 || !is_alpha(utf8[0])) {
- return false; // Invalid scheme initial character, URI is relative
- }
-
- for (char c = 0; (c = *++utf8) != '\0';) {
- if (!is_uri_scheme_char(c)) {
- return false;
- }
+ if (is_alpha(string[0])) {
+ for (size_t i = 1; string[i]; ++i) {
+ if (!is_uri_scheme_char(string[i])) {
+ return false; // Non-scheme character before a ':'
+ }
- if (c == ':') {
- return true; // End of scheme
+ if (string[i] == ':') {
+ return true; // Valid scheme terminated by a ':'
+ }
}
}
- return false;
+ return false; // String doesn't start with a scheme
}
-SerdStatus
-serd_uri_parse(const char* utf8, SerdURIView* out)
+SerdURIView
+serd_parse_uri(const char* const string)
{
- *out = SERD_URI_NULL;
-
- const char* ptr = utf8;
+ SerdURIView result = SERD_URI_NULL;
+ const char* ptr = string;
/* See http://tools.ietf.org/html/rfc3986#section-3
URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
@@ -112,11 +112,11 @@ serd_uri_parse(const char* utf8, SerdURIView* out)
case '/':
case '?':
case '#':
- ptr = utf8;
+ ptr = string;
goto path; // Relative URI (starts with path by definition)
case ':':
- out->scheme.buf = utf8;
- out->scheme.len = (size_t)((ptr++) - utf8);
+ result.scheme.buf = string;
+ result.scheme.len = (size_t)((ptr++) - string);
goto maybe_authority; // URI with scheme
case '+':
case '-':
@@ -137,7 +137,7 @@ serd_uri_parse(const char* utf8, SerdURIView* out)
maybe_authority:
if (*ptr == '/' && *(ptr + 1) == '/') {
ptr += 2;
- out->authority.buf = ptr;
+ result.authority.buf = ptr;
for (char c = 0; (c = *ptr) != '\0'; ++ptr) {
switch (c) {
case '/':
@@ -147,7 +147,7 @@ maybe_authority:
case '#':
goto fragment;
default:
- ++out->authority.len;
+ ++result.authority.len;
}
}
}
@@ -166,8 +166,8 @@ path:
default:
break;
}
- out->path.buf = ptr;
- out->path.len = 0;
+ result.path.buf = ptr;
+ result.path.len = 0;
for (char c = 0; (c = *ptr) != '\0'; ++ptr) {
switch (c) {
case '?':
@@ -175,7 +175,7 @@ path:
case '#':
goto fragment;
default:
- ++out->path.len;
+ ++result.path.len;
}
}
@@ -185,12 +185,12 @@ path:
*/
query:
if (*ptr == '?') {
- out->query.buf = ++ptr;
+ result.query.buf = ++ptr;
for (char c = 0; (c = *ptr) != '\0'; ++ptr) {
if (c == '#') {
goto fragment;
}
- ++out->query.len;
+ ++result.query.len;
}
}
@@ -200,14 +200,14 @@ query:
*/
fragment:
if (*ptr == '#') {
- out->fragment.buf = ptr;
+ result.fragment.buf = ptr;
while (*ptr++ != '\0') {
- ++out->fragment.len;
+ ++result.fragment.len;
}
}
end:
- return SERD_SUCCESS;
+ return result;
}
/**
@@ -217,64 +217,33 @@ end:
@return A pointer to the new start of `path`
*/
static const char*
-remove_dot_segments(const char* path, size_t len, size_t* up)
+remove_dot_segments(const char* path, const size_t len, size_t* up)
{
- const char* begin = path;
- const char* const end = path + len;
-
*up = 0;
- while (begin < end) {
- switch (begin[0]) {
- case '.':
- switch (begin[1]) {
- case '/':
- begin += 2; // Chop leading "./"
- break;
- case '.':
- switch (begin[2]) {
- case '\0':
- ++*up;
- begin += 2; // Chop input ".."
- break;
- case '/':
- ++*up;
- begin += 3; // Chop leading "../"
- break;
- default:
- return begin;
- }
- break;
- case '\0':
- ++begin; // Chop input "."
- // fallthru
- default:
- return begin;
- }
- break;
- case '/':
- switch (begin[1]) {
- case '.':
- switch (begin[2]) {
- case '/':
- begin += 2; // Leading "/./" => "/"
- break;
- case '.':
- switch (begin[3]) {
- case '/':
- ++*up;
- begin += 3; // Leading "/../" => "/"
- }
- break;
- default:
- return begin;
- }
- } // else fall through
- default:
- return begin; // Finished chopping dot components
+
+ for (size_t i = 0; i < len;) {
+ const char* const p = path + i;
+ if (!strncmp(p, "./", 2)) {
+ i += 2; // Chop leading "./"
+ } else if (!strncmp(p, "..", 3)) {
+ ++*up;
+ i += 2; // Chop input ".."
+ } else if (!strncmp(p, "../", 3)) {
+ ++*up;
+ i += 3; // Chop leading "../"
+ } else if (!strncmp(p, ".", 2)) {
+ ++i; // Chop input "."
+ } else if (!strncmp(p, "/./", 3)) {
+ i += 2; // Leading "/./" => "/"
+ } else if (!strncmp(p, "/../", 4)) {
+ ++*up;
+ i += 3; // Leading "/../" => "/"
+ } else {
+ return p;
}
}
- return begin;
+ return path + len;
}
/// Merge `base` and `path` in-place
@@ -305,172 +274,189 @@ merge(SerdStringView* base, SerdStringView* path)
}
/// See http://tools.ietf.org/html/rfc3986#section-5.2.2
-void
-serd_uri_resolve(const SerdURIView* r, const SerdURIView* base, SerdURIView* t)
+SerdURIView
+serd_resolve_uri(const SerdURIView r, const SerdURIView base)
{
- if (!base->scheme.len) {
- *t = *r; // Don't resolve against non-absolute URIs
- return;
+ if (r.scheme.len || !base.scheme.len) {
+ return r; // No resolution necessary || possible (respectively)
}
- t->path_base.buf = NULL;
- t->path_base.len = 0;
- if (r->scheme.len) {
- *t = *r;
+ SerdURIView t = SERD_URI_NULL;
+
+ t.path_prefix.buf = NULL;
+ t.path_prefix.len = 0;
+
+ if (r.authority.len) {
+ t.authority = r.authority;
+ t.path = r.path;
+ t.query = r.query;
} else {
- if (r->authority.len) {
- t->authority = r->authority;
- t->path = r->path;
- t->query = r->query;
+ t.path = r.path;
+ if (!r.path.len) {
+ t.path_prefix = base.path;
+ t.query = r.query.len ? r.query : base.query;
} else {
- t->path = r->path;
- if (!r->path.len) {
- t->path_base = base->path;
- if (r->query.len) {
- t->query = r->query;
- } else {
- t->query = base->query;
- }
- } else {
- if (r->path.buf[0] != '/') {
- t->path_base = base->path;
- }
- merge(&t->path_base, &t->path);
- t->query = r->query;
+ if (r.path.buf[0] != '/') {
+ t.path_prefix = base.path;
}
- t->authority = base->authority;
+
+ merge(&t.path_prefix, &t.path);
+ t.query = r.query;
}
- t->scheme = base->scheme;
- t->fragment = r->fragment;
+
+ t.authority = base.authority;
}
+
+ t.scheme = base.scheme;
+ t.fragment = r.fragment;
+
+ return t;
}
-/** Write the path of `uri` starting at index `i` */
-static size_t
-write_path_tail(SerdSink sink, void* stream, const SerdURIView* uri, size_t i)
+SerdURIView
+serd_relative_uri(const SerdURIView uri, const SerdURIView base)
{
- size_t len = 0;
- if (i < uri->path_base.len) {
- len += sink(uri->path_base.buf + i, uri->path_base.len - i, stream);
+ if (!uri_is_related(&uri, &base)) {
+ return uri;
}
- if (uri->path.buf) {
- if (i < uri->path_base.len) {
- len += sink(uri->path.buf, uri->path.len, stream);
- } else {
- const size_t j = (i - uri->path_base.len);
- len += sink(uri->path.buf + j, uri->path.len - j, stream);
- }
- }
+ SerdURIView result = SERD_URI_NULL;
- return len;
-}
+ // Regardless of the path, the query and/or fragment come along
+ result.query = uri.query;
+ result.fragment = uri.fragment;
-/** Write the path of `uri` relative to the path of `base`. */
-static size_t
-write_rel_path(SerdSink sink,
- void* stream,
- const SerdURIView* uri,
- const SerdURIView* base)
-{
- const size_t path_len = uri_path_len(uri);
- const size_t base_len = uri_path_len(base);
+ const size_t path_len = uri_path_len(&uri);
+ const size_t base_len = uri_path_len(&base);
const size_t min_len = (path_len < base_len) ? path_len : base_len;
// Find the last separator common to both paths
size_t last_shared_sep = 0;
size_t i = 0;
- for (; i < min_len && uri_path_at(uri, i) == uri_path_at(base, i); ++i) {
- if (uri_path_at(uri, i) == '/') {
+ for (; i < min_len && uri_path_at(&uri, i) == uri_path_at(&base, i); ++i) {
+ if (uri_path_at(&uri, i) == '/') {
last_shared_sep = i;
}
}
- if (i == path_len && i == base_len) { // Paths are identical
- return 0;
+ // If the URI and base URI have identical paths, the relative path is empty
+ if (i == path_len && i == base_len) {
+ result.path.buf = uri.path.buf;
+ result.path.len = 0;
+ return result;
}
+ // Otherwise, we need to build the relative path out of string slices
+
// Find the number of up references ("..") required
size_t up = 0;
for (size_t s = last_shared_sep + 1; s < base_len; ++s) {
- if (uri_path_at(base, s) == '/') {
+ if (uri_path_at(&base, s) == '/') {
++up;
}
}
- // Write up references
- size_t len = 0;
- for (size_t u = 0; u < up; ++u) {
- len += sink("../", 3, stream);
+ if (up > 0) {
+ if (last_shared_sep < uri.path_prefix.len) {
+ return SERD_URI_NULL;
+ }
+
+ // Special representation: NULL buffer and len set to the depth
+ result.path_prefix.len = up;
}
- if (last_shared_sep == 0 && up == 0) {
- len += sink("/", 1, stream);
+ if (last_shared_sep < uri.path_prefix.len) {
+ result.path_prefix.buf = uri.path_prefix.buf + last_shared_sep + 1;
+ result.path_prefix.len = uri.path_prefix.len - last_shared_sep - 1;
+ result.path = uri.path;
+ } else {
+ result.path.buf = uri.path.buf + last_shared_sep + 1;
+ result.path.len = uri.path.len - last_shared_sep - 1;
}
- // Write suffix
- return len + write_path_tail(sink, stream, uri, last_shared_sep + 1);
+ return result;
}
-static uint8_t
-serd_uri_path_starts_without_slash(const SerdURIView* uri)
+SERD_API
+bool
+serd_uri_is_within(const SerdURIView uri, const SerdURIView base)
{
- return ((uri->path_base.len || uri->path.len) &&
- ((!uri->path_base.len || uri->path_base.buf[0] != '/') &&
- (!uri->path.len || uri->path.buf[0] != '/')));
+ if (!base.scheme.len || !slice_equals(&base.scheme, &uri.scheme) ||
+ !slice_equals(&base.authority, &uri.authority)) {
+ return false;
+ }
+
+ bool differ = false;
+ const size_t path_len = uri_path_len(&uri);
+ const size_t base_len = uri_path_len(&base);
+
+ size_t last_base_slash = 0;
+ for (size_t i = 0; i < path_len && i < base_len; ++i) {
+ const char u = uri_path_at(&uri, i);
+ const char b = uri_path_at(&base, i);
+
+ differ = differ || u != b;
+ if (b == '/') {
+ last_base_slash = i;
+ if (differ) {
+ return false;
+ }
+ }
+ }
+
+ for (size_t i = last_base_slash + 1; i < base_len; ++i) {
+ if (uri_path_at(&base, i) == '/') {
+ return false;
+ }
+ }
+
+ return true;
}
/// See http://tools.ietf.org/html/rfc3986#section-5.3
size_t
-serd_uri_serialise_relative(const SerdURIView* uri,
- const SerdURIView* base,
- const SerdURIView* root,
- SerdSink sink,
- void* stream)
+serd_write_uri(const SerdURIView uri, SerdSink sink, void* stream)
{
- size_t len = 0;
- const bool relative =
- root ? uri_is_under(uri, root) : uri_is_related(uri, base);
+ size_t len = 0;
- if (relative) {
- len = write_rel_path(sink, stream, uri, base);
+ if (uri.scheme.buf) {
+ len += sink(uri.scheme.buf, uri.scheme.len, stream);
+ len += sink(":", 1, stream);
}
- if (!relative || (!len && base->query.buf)) {
- if (uri->scheme.buf) {
- len += sink(uri->scheme.buf, uri->scheme.len, stream);
- len += sink(":", 1, stream);
+ if (uri.authority.buf) {
+ len += sink("//", 2, stream);
+ len += sink(uri.authority.buf, uri.authority.len, stream);
+
+ if (uri.authority.len > 0 && uri_path_len(&uri) > 0 &&
+ uri_path_at(&uri, 0) != '/') {
+ // Special case: ensure path begins with a slash
+ // https://tools.ietf.org/html/rfc3986#section-3.2
+ len += sink("/", 1, stream);
}
- if (uri->authority.buf) {
- len += sink("//", 2, stream);
- len += sink(uri->authority.buf, uri->authority.len, stream);
- if (uri->authority.len > 0 &&
- uri->authority.buf[uri->authority.len - 1] != '/' &&
- serd_uri_path_starts_without_slash(uri)) {
- // Special case: ensure path begins with a slash
- // https://tools.ietf.org/html/rfc3986#section-3.2
- len += sink("/", 1, stream);
- }
+ }
+
+ if (uri.path_prefix.buf) {
+ len += sink(uri.path_prefix.buf, uri.path_prefix.len, stream);
+ } else if (uri.path_prefix.len) {
+ for (size_t i = 0; i < uri.path_prefix.len; ++i) {
+ len += sink("../", 3, stream);
}
- len += write_path_tail(sink, stream, uri, 0);
}
- if (uri->query.buf) {
+ if (uri.path.buf) {
+ len += sink(uri.path.buf, uri.path.len, stream);
+ }
+
+ if (uri.query.buf) {
len += sink("?", 1, stream);
- len += sink(uri->query.buf, uri->query.len, stream);
+ len += sink(uri.query.buf, uri.query.len, stream);
}
- if (uri->fragment.buf) {
- // Note uri->fragment.buf includes the leading `#'
- len += sink(uri->fragment.buf, uri->fragment.len, stream);
+ if (uri.fragment.buf) {
+ // Note that uri.fragment.buf includes the leading `#'
+ len += sink(uri.fragment.buf, uri.fragment.len, stream);
}
return len;
}
-
-/// See http://tools.ietf.org/html/rfc3986#section-5.3
-size_t
-serd_uri_serialise(const SerdURIView* uri, SerdSink sink, void* stream)
-{
- return serd_uri_serialise_relative(uri, NULL, NULL, sink, stream);
-}