From 195e4bcff3c4dfd3fe8bbf0df57d53ce89ca99e8 Mon Sep 17 00:00:00 2001
From: David Robillard <d@drobilla.net>
Date: Thu, 29 Jun 2017 12:21:09 -0400
Subject: Fix strict parsing of abolute URI schemes

---
 src/reader.c        | 43 ++++++++++++++++++++++++++-----------------
 src/serd_internal.h | 11 +++++++++++
 src/uri.c           | 12 ++++--------
 3 files changed, 41 insertions(+), 25 deletions(-)

(limited to 'src')

diff --git a/src/reader.c b/src/reader.c
index 29526223..54e2724a 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -773,35 +773,44 @@ read_LANGTAG(SerdReader* reader)
 	return ref;
 }
 
-typedef enum { PREFIX, GOOD, BAD} SchemeState;
-
-static inline bool
-check_scheme(SerdReader* reader, uint8_t c, SchemeState* state)
+static bool
+read_IRIREF_scheme(SerdReader* reader, Ref dest)
 {
-	if (!supports_relative_iris(reader) && *state == PREFIX) {
-		if (c == ':') {
-			*state = GOOD;
-		} else if (!isalpha(c)) {
-			*state = BAD;
+	uint8_t c = peek_byte(reader);
+	if (!isalpha(c)) {
+		return r_err(reader, SERD_ERR_BAD_SYNTAX,
+		             "bad IRI scheme start `%c'\n", c);
+	}
+
+	while ((c = peek_byte(reader))) {
+		if (c == '>') {
+			return r_err(reader, SERD_ERR_BAD_SYNTAX, "missing IRI scheme\n");
+		} else if (!is_uri_scheme_char(c)) {
 			return r_err(reader, SERD_ERR_BAD_SYNTAX,
-			             "syntax does not support relative IRIs\n");
+			             "bad IRI scheme char `%X'\n", c);
+		}
+
+		push_byte(reader, dest, eat_byte_safe(reader, c));
+		if (c == ':') {
+			return true;  // End of scheme
 		}
 	}
-	return true;
+
+	return false;
 }
 
 static Ref
 read_IRIREF(SerdReader* reader)
 {
 	TRY_RET(eat_byte_check(reader, '<'));
-	Ref         ref    = push_node(reader, SERD_URI, "", 0);
-	SchemeState scheme = PREFIX;
-	uint32_t    code;
+	Ref ref = push_node(reader, SERD_URI, "", 0);
+	if (!supports_relative_iris(reader) && !read_IRIREF_scheme(reader, ref)) {
+		return pop_node(reader, ref);
+	}
+
+	uint32_t code;
 	while (true) {
 		const uint8_t c = peek_byte(reader);
-		if (!check_scheme(reader, c, &scheme)) {
-			return pop_node(reader, ref);
-		}
 		switch (c) {
 		case '"': case '<': case '^': case '`': case '{': case '|': case '}':
 			r_err(reader, SERD_ERR_BAD_SYNTAX,
diff --git a/src/serd_internal.h b/src/serd_internal.h
index affdd31f..297b4507 100644
--- a/src/serd_internal.h
+++ b/src/serd_internal.h
@@ -368,6 +368,17 @@ uri_is_under(const SerdURI* uri, const SerdURI* root)
 	return true;
 }
 
+static inline bool
+is_uri_scheme_char(const uint8_t c)
+{
+	switch (c) {
+	case ':': case '+': case '-': case '.':
+		return true;
+	default:
+		return is_alpha(c) || is_digit(c);
+	}
+}
+
 /* Error reporting */
 
 static inline void
diff --git a/src/uri.c b/src/uri.c
index 6b4fc07e..fcea3b62 100644
--- a/src/uri.c
+++ b/src/uri.c
@@ -103,16 +103,12 @@ serd_uri_string_has_scheme(const uint8_t* utf8)
 	if (!utf8 || !is_alpha(utf8[0])) {
 		return false;  // Invalid scheme initial character, URI is relative
 	}
+
 	for (uint8_t c; (c = *++utf8) != '\0';) {
-		switch (c) {
-		case ':':
+		if (!is_uri_scheme_char(c)) {
+			return false;
+		} else if (c == ':') {
 			return true;  // End of scheme
-		case '+': case '-': case '.':
-			break;  // Valid scheme character, continue
-		default:
-			if (!is_alpha(c) && !is_digit(c)) {
-				return false;  // Invalid scheme character
-			}
 		}
 	}
 
-- 
cgit v1.2.1