aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--NEWS3
-rw-r--r--src/reader.c43
-rw-r--r--src/serd_internal.h11
-rw-r--r--src/uri.c12
-rw-r--r--tests/bad/bad-missing-uri-scheme.nt1
-rw-r--r--tests/bad/bad-uri-scheme-start.nt1
-rw-r--r--tests/bad/bad-uri-scheme.nt1
-rw-r--r--tests/bad/bad-uri-truncated.nt1
-rw-r--r--tests/good/test-uri.nt3
-rw-r--r--tests/good/test-uri.ttl3
-rw-r--r--wscript7
11 files changed, 57 insertions, 29 deletions
diff --git a/NEWS b/NEWS
index 679604cc..5839d2a3 100644
--- a/NEWS
+++ b/NEWS
@@ -1,8 +1,9 @@
serd (0.27.1) unstable;
* Add support for reading from a user provided callback
+ * Fix strict parsing of abolute URI schemes
- -- David Robillard <d@drobilla.net> Mon, 24 Apr 2017 19:06:08 +0200
+ -- David Robillard <d@drobilla.net> Thu, 29 Jun 2017 12:20:40 -0400
serd (0.26.0) stable;
diff --git a/src/reader.c b/src/reader.c
index 29526223..54e2724a 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -773,35 +773,44 @@ read_LANGTAG(SerdReader* reader)
return ref;
}
-typedef enum { PREFIX, GOOD, BAD} SchemeState;
-
-static inline bool
-check_scheme(SerdReader* reader, uint8_t c, SchemeState* state)
+static bool
+read_IRIREF_scheme(SerdReader* reader, Ref dest)
{
- if (!supports_relative_iris(reader) && *state == PREFIX) {
- if (c == ':') {
- *state = GOOD;
- } else if (!isalpha(c)) {
- *state = BAD;
+ uint8_t c = peek_byte(reader);
+ if (!isalpha(c)) {
+ return r_err(reader, SERD_ERR_BAD_SYNTAX,
+ "bad IRI scheme start `%c'\n", c);
+ }
+
+ while ((c = peek_byte(reader))) {
+ if (c == '>') {
+ return r_err(reader, SERD_ERR_BAD_SYNTAX, "missing IRI scheme\n");
+ } else if (!is_uri_scheme_char(c)) {
return r_err(reader, SERD_ERR_BAD_SYNTAX,
- "syntax does not support relative IRIs\n");
+ "bad IRI scheme char `%X'\n", c);
+ }
+
+ push_byte(reader, dest, eat_byte_safe(reader, c));
+ if (c == ':') {
+ return true; // End of scheme
}
}
- return true;
+
+ return false;
}
static Ref
read_IRIREF(SerdReader* reader)
{
TRY_RET(eat_byte_check(reader, '<'));
- Ref ref = push_node(reader, SERD_URI, "", 0);
- SchemeState scheme = PREFIX;
- uint32_t code;
+ Ref ref = push_node(reader, SERD_URI, "", 0);
+ if (!supports_relative_iris(reader) && !read_IRIREF_scheme(reader, ref)) {
+ return pop_node(reader, ref);
+ }
+
+ uint32_t code;
while (true) {
const uint8_t c = peek_byte(reader);
- if (!check_scheme(reader, c, &scheme)) {
- return pop_node(reader, ref);
- }
switch (c) {
case '"': case '<': case '^': case '`': case '{': case '|': case '}':
r_err(reader, SERD_ERR_BAD_SYNTAX,
diff --git a/src/serd_internal.h b/src/serd_internal.h
index affdd31f..297b4507 100644
--- a/src/serd_internal.h
+++ b/src/serd_internal.h
@@ -368,6 +368,17 @@ uri_is_under(const SerdURI* uri, const SerdURI* root)
return true;
}
+static inline bool
+is_uri_scheme_char(const uint8_t c)
+{
+ switch (c) {
+ case ':': case '+': case '-': case '.':
+ return true;
+ default:
+ return is_alpha(c) || is_digit(c);
+ }
+}
+
/* Error reporting */
static inline void
diff --git a/src/uri.c b/src/uri.c
index 6b4fc07e..fcea3b62 100644
--- a/src/uri.c
+++ b/src/uri.c
@@ -103,16 +103,12 @@ serd_uri_string_has_scheme(const uint8_t* utf8)
if (!utf8 || !is_alpha(utf8[0])) {
return false; // Invalid scheme initial character, URI is relative
}
+
for (uint8_t c; (c = *++utf8) != '\0';) {
- switch (c) {
- case ':':
+ if (!is_uri_scheme_char(c)) {
+ return false;
+ } else if (c == ':') {
return true; // End of scheme
- case '+': case '-': case '.':
- break; // Valid scheme character, continue
- default:
- if (!is_alpha(c) && !is_digit(c)) {
- return false; // Invalid scheme character
- }
}
}
diff --git a/tests/bad/bad-missing-uri-scheme.nt b/tests/bad/bad-missing-uri-scheme.nt
new file mode 100644
index 00000000..5d7bc724
--- /dev/null
+++ b/tests/bad/bad-missing-uri-scheme.nt
@@ -0,0 +1 @@
+<foo> <bar> <baz> .
diff --git a/tests/bad/bad-uri-scheme-start.nt b/tests/bad/bad-uri-scheme-start.nt
new file mode 100644
index 00000000..cd3fd70f
--- /dev/null
+++ b/tests/bad/bad-uri-scheme-start.nt
@@ -0,0 +1 @@
+<2http://example.org/s> <http://example.org/p> <http://example.org/o> .
diff --git a/tests/bad/bad-uri-scheme.nt b/tests/bad/bad-uri-scheme.nt
new file mode 100644
index 00000000..1329edcd
--- /dev/null
+++ b/tests/bad/bad-uri-scheme.nt
@@ -0,0 +1 @@
+<b@d://example.org/s> <http://example.org/p> <http://example.org/o> .
diff --git a/tests/bad/bad-uri-truncated.nt b/tests/bad/bad-uri-truncated.nt
new file mode 100644
index 00000000..22d29e4b
--- /dev/null
+++ b/tests/bad/bad-uri-truncated.nt
@@ -0,0 +1 @@
+<foo \ No newline at end of file
diff --git a/tests/good/test-uri.nt b/tests/good/test-uri.nt
index 6c56f594..1744963c 100644
--- a/tests/good/test-uri.nt
+++ b/tests/good/test-uri.nt
@@ -41,5 +41,8 @@
<http://a/b/c/d;p?aquery> <http://www.w3.org/2002/07/owl#sameAs> <http://a/b/c/d;p?aquery> .
<http://a/b/c/d;p?q#afragment> <http://www.w3.org/2002/07/owl#sameAs> <http://a/b/c/d;p?q#afragment> .
<http://a/> <http://www.w3.org/2002/07/owl#sameAs> <http://a/> .
+<view-source://foo> <http://www.w3.org/2002/07/owl#sameAs> <view-source://foo> .
+<coap+tcp://foo> <http://www.w3.org/2002/07/owl#sameAs> <coap+tcp://foo> .
+<osc.udp://foo> <http://www.w3.org/2002/07/owl#sameAs> <osc.udp://foo> .
<http://B/foo> <http://www.w3.org/2002/07/owl#sameAs> <http://B/foo> .
<http://C/bar> <http://www.w3.org/2002/07/owl#sameAs> <http://C/bar> .
diff --git a/tests/good/test-uri.ttl b/tests/good/test-uri.ttl
index cf43a38b..b6a8d967 100644
--- a/tests/good/test-uri.ttl
+++ b/tests/good/test-uri.ttl
@@ -58,6 +58,9 @@
<?aquery> owl:sameAs <http://a/b/c/d;p?aquery> .
<#afragment> owl:sameAs <http://a/b/c/d;p?q#afragment> .
<../../../../../../> owl:sameAs <http://a/> .
+<view-source://foo> owl:sameAs <view-source://foo> .
+<coap+tcp://foo> owl:sameAs <coap+tcp://foo> .
+<osc.udp://foo> owl:sameAs <osc.udp://foo> .
@base <http://B?bquery> .
diff --git a/wscript b/wscript
index 7558d73b..3aff945c 100644
--- a/wscript
+++ b/wscript
@@ -392,7 +392,7 @@ def test(ctx):
os.chdir(orig_dir)
os.chdir(srcdir)
- bad_tests = glob.glob('tests/bad/*.ttl')
+ bad_tests = glob.glob('tests/bad/*.ttl') + glob.glob('tests/bad/*.nt')
bad_tests.sort()
os.chdir(orig_dir)
@@ -483,8 +483,9 @@ def test(ctx):
for lax in ['', '-l']:
autowaf.run_test(
ctx, APPNAME,
- 'serdi_static %s -q "%s" "%s" > %s.out' % (
- lax, os.path.join(srcdir, test), test_base(test), test),
+ 'serdi_static %s -i %s -q "%s" "%s" > %s.out' % (
+ lax, 'turtle' if test.endswith('.ttl') else 'ntriples',
+ os.path.join(srcdir, test), test_base(test), test),
1,
name=test)
autowaf.end_tests(ctx, APPNAME, 'bad')