aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2017-06-29 12:21:09 -0400
committerDavid Robillard <d@drobilla.net>2017-06-29 12:21:09 -0400
commit195e4bcff3c4dfd3fe8bbf0df57d53ce89ca99e8 (patch)
tree952b067c7ace8afb1f78cb51ac032d18565c94a8 /src
parent21211d73053d0a66a1da601472c68598cfc53595 (diff)
downloadserd-195e4bcff3c4dfd3fe8bbf0df57d53ce89ca99e8.tar.gz
serd-195e4bcff3c4dfd3fe8bbf0df57d53ce89ca99e8.tar.bz2
serd-195e4bcff3c4dfd3fe8bbf0df57d53ce89ca99e8.zip
Fix strict parsing of abolute URI schemes
Diffstat (limited to 'src')
-rw-r--r--src/reader.c43
-rw-r--r--src/serd_internal.h11
-rw-r--r--src/uri.c12
3 files changed, 41 insertions, 25 deletions
diff --git a/src/reader.c b/src/reader.c
index 29526223..54e2724a 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -773,35 +773,44 @@ read_LANGTAG(SerdReader* reader)
return ref;
}
-typedef enum { PREFIX, GOOD, BAD} SchemeState;
-
-static inline bool
-check_scheme(SerdReader* reader, uint8_t c, SchemeState* state)
+static bool
+read_IRIREF_scheme(SerdReader* reader, Ref dest)
{
- if (!supports_relative_iris(reader) && *state == PREFIX) {
- if (c == ':') {
- *state = GOOD;
- } else if (!isalpha(c)) {
- *state = BAD;
+ uint8_t c = peek_byte(reader);
+ if (!isalpha(c)) {
+ return r_err(reader, SERD_ERR_BAD_SYNTAX,
+ "bad IRI scheme start `%c'\n", c);
+ }
+
+ while ((c = peek_byte(reader))) {
+ if (c == '>') {
+ return r_err(reader, SERD_ERR_BAD_SYNTAX, "missing IRI scheme\n");
+ } else if (!is_uri_scheme_char(c)) {
return r_err(reader, SERD_ERR_BAD_SYNTAX,
- "syntax does not support relative IRIs\n");
+ "bad IRI scheme char `%X'\n", c);
+ }
+
+ push_byte(reader, dest, eat_byte_safe(reader, c));
+ if (c == ':') {
+ return true; // End of scheme
}
}
- return true;
+
+ return false;
}
static Ref
read_IRIREF(SerdReader* reader)
{
TRY_RET(eat_byte_check(reader, '<'));
- Ref ref = push_node(reader, SERD_URI, "", 0);
- SchemeState scheme = PREFIX;
- uint32_t code;
+ Ref ref = push_node(reader, SERD_URI, "", 0);
+ if (!supports_relative_iris(reader) && !read_IRIREF_scheme(reader, ref)) {
+ return pop_node(reader, ref);
+ }
+
+ uint32_t code;
while (true) {
const uint8_t c = peek_byte(reader);
- if (!check_scheme(reader, c, &scheme)) {
- return pop_node(reader, ref);
- }
switch (c) {
case '"': case '<': case '^': case '`': case '{': case '|': case '}':
r_err(reader, SERD_ERR_BAD_SYNTAX,
diff --git a/src/serd_internal.h b/src/serd_internal.h
index affdd31f..297b4507 100644
--- a/src/serd_internal.h
+++ b/src/serd_internal.h
@@ -368,6 +368,17 @@ uri_is_under(const SerdURI* uri, const SerdURI* root)
return true;
}
+static inline bool
+is_uri_scheme_char(const uint8_t c)
+{
+ switch (c) {
+ case ':': case '+': case '-': case '.':
+ return true;
+ default:
+ return is_alpha(c) || is_digit(c);
+ }
+}
+
/* Error reporting */
static inline void
diff --git a/src/uri.c b/src/uri.c
index 6b4fc07e..fcea3b62 100644
--- a/src/uri.c
+++ b/src/uri.c
@@ -103,16 +103,12 @@ serd_uri_string_has_scheme(const uint8_t* utf8)
if (!utf8 || !is_alpha(utf8[0])) {
return false; // Invalid scheme initial character, URI is relative
}
+
for (uint8_t c; (c = *++utf8) != '\0';) {
- switch (c) {
- case ':':
+ if (!is_uri_scheme_char(c)) {
+ return false;
+ } else if (c == ':') {
return true; // End of scheme
- case '+': case '-': case '.':
- break; // Valid scheme character, continue
- default:
- if (!is_alpha(c) && !is_digit(c)) {
- return false; // Invalid scheme character
- }
}
}