diff options
author | David Robillard <d@drobilla.net> | 2017-06-29 12:21:09 -0400 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2017-06-29 12:21:09 -0400 |
commit | 195e4bcff3c4dfd3fe8bbf0df57d53ce89ca99e8 (patch) | |
tree | 952b067c7ace8afb1f78cb51ac032d18565c94a8 /src | |
parent | 21211d73053d0a66a1da601472c68598cfc53595 (diff) | |
download | serd-195e4bcff3c4dfd3fe8bbf0df57d53ce89ca99e8.tar.gz serd-195e4bcff3c4dfd3fe8bbf0df57d53ce89ca99e8.tar.bz2 serd-195e4bcff3c4dfd3fe8bbf0df57d53ce89ca99e8.zip |
Fix strict parsing of abolute URI schemes
Diffstat (limited to 'src')
-rw-r--r-- | src/reader.c | 43 | ||||
-rw-r--r-- | src/serd_internal.h | 11 | ||||
-rw-r--r-- | src/uri.c | 12 |
3 files changed, 41 insertions, 25 deletions
diff --git a/src/reader.c b/src/reader.c index 29526223..54e2724a 100644 --- a/src/reader.c +++ b/src/reader.c @@ -773,35 +773,44 @@ read_LANGTAG(SerdReader* reader) return ref; } -typedef enum { PREFIX, GOOD, BAD} SchemeState; - -static inline bool -check_scheme(SerdReader* reader, uint8_t c, SchemeState* state) +static bool +read_IRIREF_scheme(SerdReader* reader, Ref dest) { - if (!supports_relative_iris(reader) && *state == PREFIX) { - if (c == ':') { - *state = GOOD; - } else if (!isalpha(c)) { - *state = BAD; + uint8_t c = peek_byte(reader); + if (!isalpha(c)) { + return r_err(reader, SERD_ERR_BAD_SYNTAX, + "bad IRI scheme start `%c'\n", c); + } + + while ((c = peek_byte(reader))) { + if (c == '>') { + return r_err(reader, SERD_ERR_BAD_SYNTAX, "missing IRI scheme\n"); + } else if (!is_uri_scheme_char(c)) { return r_err(reader, SERD_ERR_BAD_SYNTAX, - "syntax does not support relative IRIs\n"); + "bad IRI scheme char `%X'\n", c); + } + + push_byte(reader, dest, eat_byte_safe(reader, c)); + if (c == ':') { + return true; // End of scheme } } - return true; + + return false; } static Ref read_IRIREF(SerdReader* reader) { TRY_RET(eat_byte_check(reader, '<')); - Ref ref = push_node(reader, SERD_URI, "", 0); - SchemeState scheme = PREFIX; - uint32_t code; + Ref ref = push_node(reader, SERD_URI, "", 0); + if (!supports_relative_iris(reader) && !read_IRIREF_scheme(reader, ref)) { + return pop_node(reader, ref); + } + + uint32_t code; while (true) { const uint8_t c = peek_byte(reader); - if (!check_scheme(reader, c, &scheme)) { - return pop_node(reader, ref); - } switch (c) { case '"': case '<': case '^': case '`': case '{': case '|': case '}': r_err(reader, SERD_ERR_BAD_SYNTAX, diff --git a/src/serd_internal.h b/src/serd_internal.h index affdd31f..297b4507 100644 --- a/src/serd_internal.h +++ b/src/serd_internal.h @@ -368,6 +368,17 @@ uri_is_under(const SerdURI* uri, const SerdURI* root) return true; } +static inline bool +is_uri_scheme_char(const uint8_t c) +{ + switch (c) { + case ':': case '+': case '-': case '.': + return true; + default: + return is_alpha(c) || is_digit(c); + } +} + /* Error reporting */ static inline void @@ -103,16 +103,12 @@ serd_uri_string_has_scheme(const uint8_t* utf8) if (!utf8 || !is_alpha(utf8[0])) { return false; // Invalid scheme initial character, URI is relative } + for (uint8_t c; (c = *++utf8) != '\0';) { - switch (c) { - case ':': + if (!is_uri_scheme_char(c)) { + return false; + } else if (c == ':') { return true; // End of scheme - case '+': case '-': case '.': - break; // Valid scheme character, continue - default: - if (!is_alpha(c) && !is_digit(c)) { - return false; // Invalid scheme character - } } } |