aboutsummaryrefslogtreecommitdiffstats
path: root/src/uri.c
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2023-02-06 07:43:36 -0500
committerDavid Robillard <d@drobilla.net>2023-12-02 18:49:08 -0500
commitb992fe3ef83e102a999084070214b8295f824f6a (patch)
tree20f702125b4a257200bbcffbe178118b8d577352 /src/uri.c
parent4cf33db925fbd8bea0defeb34e1ed6575349e644 (diff)
downloadserd-b992fe3ef83e102a999084070214b8295f824f6a.tar.gz
serd-b992fe3ef83e102a999084070214b8295f824f6a.tar.bz2
serd-b992fe3ef83e102a999084070214b8295f824f6a.zip
Reduce complexity of URI parsing code
Diffstat (limited to 'src/uri.c')
-rw-r--r--src/uri.c197
1 files changed, 97 insertions, 100 deletions
diff --git a/src/uri.c b/src/uri.c
index e7445377..6fc1f17c 100644
--- a/src/uri.c
+++ b/src/uri.c
@@ -5,6 +5,7 @@
#include "uri_utils.h"
#include "serd/buffer.h"
+#include "serd/status.h"
#include "serd/stream.h"
#include "serd/string_view.h"
#include "serd/uri.h"
@@ -16,11 +17,40 @@
#include <stdlib.h>
#include <string.h>
+static SerdStatus
+write_file_uri_char(const char c, void* const stream)
+{
+ return (serd_buffer_write(&c, 1, 1, stream) == 1) ? SERD_SUCCESS
+ : SERD_BAD_ALLOC;
+}
+
+static char*
+parse_hostname(const char* const authority, char** const hostname)
+{
+ char* const path = strchr(authority, '/');
+ if (!path) {
+ return NULL;
+ }
+
+ if (hostname) {
+ const size_t len = (size_t)(path - authority);
+ if (!(*hostname = (char*)calloc(len + 1, 1))) {
+ return NULL;
+ }
+
+ memcpy(*hostname, authority, len);
+ }
+
+ return path;
+}
+
char*
serd_parse_file_uri(const char* const uri, char** const hostname)
{
assert(uri);
+ SerdStatus st = SERD_SUCCESS;
+
const char* path = uri;
if (hostname) {
*hostname = NULL;
@@ -30,16 +60,8 @@ serd_parse_file_uri(const char* const uri, char** const hostname)
const char* auth = uri + 7;
if (*auth == '/') { // No hostname
path = auth;
- } else { // Has hostname
- if (!(path = strchr(auth, '/'))) {
- return NULL;
- }
-
- if (hostname) {
- const size_t len = (size_t)(path - auth);
- *hostname = (char*)calloc(len + 1, 1);
- memcpy(*hostname, auth, len);
- }
+ } else if (!(path = parse_hostname(auth, hostname))) {
+ return NULL;
}
}
@@ -48,26 +70,30 @@ serd_parse_file_uri(const char* const uri, char** const hostname)
}
SerdBuffer buffer = {NULL, 0};
- for (const char* s = path; *s; ++s) {
- if (*s == '%') {
- if (*(s + 1) == '%') {
- serd_buffer_write("%", 1, 1, &buffer);
+ for (const char* s = path; !st && *s; ++s) {
+ if (*s != '%') {
+ st = write_file_uri_char(*s, &buffer);
+ } else if (*(s + 1) == '%') {
+ if (!(st = write_file_uri_char('%', &buffer))) {
++s;
- } else if (is_hexdig(*(s + 1)) && is_hexdig(*(s + 2))) {
- const uint8_t hi = hex_digit_value((const uint8_t)s[1]);
- const uint8_t lo = hex_digit_value((const uint8_t)s[2]);
- const char c = (char)((hi << 4U) | lo);
- serd_buffer_write(&c, 1, 1, &buffer);
+ }
+ } else if (is_hexdig(*(s + 1)) && is_hexdig(*(s + 2))) {
+ const uint8_t hi = hex_digit_value((const uint8_t)s[1]);
+ const uint8_t lo = hex_digit_value((const uint8_t)s[2]);
+ const char c = (char)((hi << 4U) | lo);
+ if (!(st = write_file_uri_char(c, &buffer))) {
s += 2;
- } else {
- s += 2; // Junk escape, ignore
}
} else {
- serd_buffer_write(s, 1, 1, &buffer);
+ s += 2; // Junk escape, ignore
}
}
- serd_buffer_close(&buffer);
+ if (st || serd_buffer_close(&buffer)) {
+ free(buffer.buf);
+ return NULL;
+ }
+
return (char*)buffer.buf;
}
@@ -92,6 +118,24 @@ serd_uri_string_has_scheme(const char* const string)
return false;
}
+static inline bool
+is_uri_authority_char(const char c)
+{
+ return c && c != '/' && c != '?' && c != '#';
+}
+
+static inline bool
+is_uri_path_char(const char c)
+{
+ return c && c != '?' && c != '#';
+}
+
+static inline bool
+is_uri_query_char(const char c)
+{
+ return c && c != '#';
+}
+
SerdURIView
serd_parse_uri(const char* const string)
{
@@ -101,112 +145,65 @@ serd_parse_uri(const char* const string)
const char* ptr = string;
/* See http://tools.ietf.org/html/rfc3986#section-3
- URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
- */
+ URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] */
/* S3.1: scheme ::= ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */
if (is_alpha(*ptr)) {
for (char c = *++ptr; true; c = *++ptr) {
- switch (c) {
- case '\0':
- case '/':
- case '?':
- case '#':
- ptr = string;
- goto path; // Relative URI (starts with path by definition)
- case ':':
+ if (c == ':') {
result.scheme.data = string;
- result.scheme.length = (size_t)((ptr++) - string);
- goto maybe_authority; // URI with scheme
- case '+':
- case '-':
- case '.':
- continue;
- default:
- if (is_alpha(c) || is_digit(c)) {
- continue;
- }
+ result.scheme.length = (size_t)(ptr++ - string);
+ break;
+ }
+
+ if (!is_uri_scheme_char(c)) {
+ ptr = string;
+ break;
}
}
}
- /* S3.2: The authority component is preceded by a double slash ("//")
- and is terminated by the next slash ("/"), question mark ("?"),
- or number sign ("#") character, or by the end of the URI.
- */
-maybe_authority:
+ /* S3.2: The authority component is preceded by "//" and is terminated by the
+ next '/', '?', or '#', or by the end of the URI. */
if (*ptr == '/' && *(ptr + 1) == '/') {
ptr += 2;
result.authority.data = ptr;
- for (char c = 0; (c = *ptr) != '\0'; ++ptr) {
- switch (c) {
- case '/':
- goto path;
- case '?':
- goto query;
- case '#':
- goto fragment;
- default:
- ++result.authority.length;
- }
+ while (is_uri_authority_char(*ptr)) {
+ ++result.authority.length;
+ ++ptr;
}
}
- /* RFC3986 S3.3: The path is terminated by the first question mark ("?")
- or number sign ("#") character, or by the end of the URI.
- */
-path:
- switch (*ptr) {
- case '?':
- goto query;
- case '#':
- goto fragment;
- case '\0':
- goto end;
- default:
- break;
- }
- result.path.data = ptr;
- result.path.length = 0;
- for (char c = 0; (c = *ptr) != '\0'; ++ptr) {
- switch (c) {
- case '?':
- goto query;
- case '#':
- goto fragment;
- default:
+ /* S3.3: The path is terminated by the first '?' or '#', or by the end of the
+ URI. */
+ if (is_uri_path_char(*ptr)) {
+ result.path.data = ptr++;
+ result.path.length = 1U;
+ while (is_uri_path_char(*ptr)) {
++result.path.length;
+ ++ptr;
}
}
- /* RFC3986 S3.4: The query component is indicated by the first question
- mark ("?") character and terminated by a number sign ("#") character
- or by the end of the URI.
- */
-query:
+ /* S3.4: The query component is indicated by the first '?' and terminated by
+ a '#' or by the end of the URI. */
if (*ptr == '?') {
result.query.data = ++ptr;
- for (char c = 0; (c = *ptr) != '\0'; ++ptr) {
- if (c == '#') {
- goto fragment;
- }
+ while (is_uri_query_char(*ptr)) {
++result.query.length;
+ ++ptr;
}
}
- /* RFC3986 S3.5: A fragment identifier component is indicated by the
- presence of a number sign ("#") character and terminated by the end
- of the URI.
- */
-fragment:
+ /* S3.5: A fragment identifier component is indicated by the presence of a
+ '#' and terminated by the end of the URI. */
if (*ptr == '#') {
result.fragment.data = ptr;
- while (*ptr++ != '\0') {
+ while (*ptr++) {
++result.fragment.length;
}
}
-end:
return result;
}
@@ -495,7 +492,7 @@ serd_write_uri(const SerdURIView uri,
}
static bool
-is_uri_path_char(const char c)
+is_unescaped_uri_path_char(const char c)
{
if (is_alpha(c) || is_digit(c)) {
return true;
@@ -563,7 +560,7 @@ serd_write_file_uri(const SerdStringView path,
for (size_t i = 0; i < path.length; ++i) {
if (path.data[i] == '%') {
len += sink("%%", 1, 2, stream);
- } else if (is_uri_path_char(path.data[i])) {
+ } else if (is_unescaped_uri_path_char(path.data[i])) {
len += sink(path.data + i, 1, 1, stream);
#ifdef _WIN32
} else if (path.data[i] == '\\') {