diff options
-rw-r--r-- | NEWS | 3 | ||||
-rw-r--r-- | src/reader.c | 43 | ||||
-rw-r--r-- | src/serd_internal.h | 11 | ||||
-rw-r--r-- | src/uri.c | 12 | ||||
-rw-r--r-- | tests/bad/bad-missing-uri-scheme.nt | 1 | ||||
-rw-r--r-- | tests/bad/bad-uri-scheme-start.nt | 1 | ||||
-rw-r--r-- | tests/bad/bad-uri-scheme.nt | 1 | ||||
-rw-r--r-- | tests/bad/bad-uri-truncated.nt | 1 | ||||
-rw-r--r-- | tests/good/test-uri.nt | 3 | ||||
-rw-r--r-- | tests/good/test-uri.ttl | 3 | ||||
-rw-r--r-- | wscript | 7 |
11 files changed, 57 insertions, 29 deletions
@@ -1,8 +1,9 @@ serd (0.27.1) unstable; * Add support for reading from a user provided callback + * Fix strict parsing of abolute URI schemes - -- David Robillard <d@drobilla.net> Mon, 24 Apr 2017 19:06:08 +0200 + -- David Robillard <d@drobilla.net> Thu, 29 Jun 2017 12:20:40 -0400 serd (0.26.0) stable; diff --git a/src/reader.c b/src/reader.c index 29526223..54e2724a 100644 --- a/src/reader.c +++ b/src/reader.c @@ -773,35 +773,44 @@ read_LANGTAG(SerdReader* reader) return ref; } -typedef enum { PREFIX, GOOD, BAD} SchemeState; - -static inline bool -check_scheme(SerdReader* reader, uint8_t c, SchemeState* state) +static bool +read_IRIREF_scheme(SerdReader* reader, Ref dest) { - if (!supports_relative_iris(reader) && *state == PREFIX) { - if (c == ':') { - *state = GOOD; - } else if (!isalpha(c)) { - *state = BAD; + uint8_t c = peek_byte(reader); + if (!isalpha(c)) { + return r_err(reader, SERD_ERR_BAD_SYNTAX, + "bad IRI scheme start `%c'\n", c); + } + + while ((c = peek_byte(reader))) { + if (c == '>') { + return r_err(reader, SERD_ERR_BAD_SYNTAX, "missing IRI scheme\n"); + } else if (!is_uri_scheme_char(c)) { return r_err(reader, SERD_ERR_BAD_SYNTAX, - "syntax does not support relative IRIs\n"); + "bad IRI scheme char `%X'\n", c); + } + + push_byte(reader, dest, eat_byte_safe(reader, c)); + if (c == ':') { + return true; // End of scheme } } - return true; + + return false; } static Ref read_IRIREF(SerdReader* reader) { TRY_RET(eat_byte_check(reader, '<')); - Ref ref = push_node(reader, SERD_URI, "", 0); - SchemeState scheme = PREFIX; - uint32_t code; + Ref ref = push_node(reader, SERD_URI, "", 0); + if (!supports_relative_iris(reader) && !read_IRIREF_scheme(reader, ref)) { + return pop_node(reader, ref); + } + + uint32_t code; while (true) { const uint8_t c = peek_byte(reader); - if (!check_scheme(reader, c, &scheme)) { - return pop_node(reader, ref); - } switch (c) { case '"': case '<': case '^': case '`': case '{': case '|': case '}': r_err(reader, SERD_ERR_BAD_SYNTAX, diff --git a/src/serd_internal.h b/src/serd_internal.h index affdd31f..297b4507 100644 --- a/src/serd_internal.h +++ b/src/serd_internal.h @@ -368,6 +368,17 @@ uri_is_under(const SerdURI* uri, const SerdURI* root) return true; } +static inline bool +is_uri_scheme_char(const uint8_t c) +{ + switch (c) { + case ':': case '+': case '-': case '.': + return true; + default: + return is_alpha(c) || is_digit(c); + } +} + /* Error reporting */ static inline void @@ -103,16 +103,12 @@ serd_uri_string_has_scheme(const uint8_t* utf8) if (!utf8 || !is_alpha(utf8[0])) { return false; // Invalid scheme initial character, URI is relative } + for (uint8_t c; (c = *++utf8) != '\0';) { - switch (c) { - case ':': + if (!is_uri_scheme_char(c)) { + return false; + } else if (c == ':') { return true; // End of scheme - case '+': case '-': case '.': - break; // Valid scheme character, continue - default: - if (!is_alpha(c) && !is_digit(c)) { - return false; // Invalid scheme character - } } } diff --git a/tests/bad/bad-missing-uri-scheme.nt b/tests/bad/bad-missing-uri-scheme.nt new file mode 100644 index 00000000..5d7bc724 --- /dev/null +++ b/tests/bad/bad-missing-uri-scheme.nt @@ -0,0 +1 @@ +<foo> <bar> <baz> . diff --git a/tests/bad/bad-uri-scheme-start.nt b/tests/bad/bad-uri-scheme-start.nt new file mode 100644 index 00000000..cd3fd70f --- /dev/null +++ b/tests/bad/bad-uri-scheme-start.nt @@ -0,0 +1 @@ +<2http://example.org/s> <http://example.org/p> <http://example.org/o> . diff --git a/tests/bad/bad-uri-scheme.nt b/tests/bad/bad-uri-scheme.nt new file mode 100644 index 00000000..1329edcd --- /dev/null +++ b/tests/bad/bad-uri-scheme.nt @@ -0,0 +1 @@ +<b@d://example.org/s> <http://example.org/p> <http://example.org/o> . diff --git a/tests/bad/bad-uri-truncated.nt b/tests/bad/bad-uri-truncated.nt new file mode 100644 index 00000000..22d29e4b --- /dev/null +++ b/tests/bad/bad-uri-truncated.nt @@ -0,0 +1 @@ +<foo
\ No newline at end of file diff --git a/tests/good/test-uri.nt b/tests/good/test-uri.nt index 6c56f594..1744963c 100644 --- a/tests/good/test-uri.nt +++ b/tests/good/test-uri.nt @@ -41,5 +41,8 @@ <http://a/b/c/d;p?aquery> <http://www.w3.org/2002/07/owl#sameAs> <http://a/b/c/d;p?aquery> . <http://a/b/c/d;p?q#afragment> <http://www.w3.org/2002/07/owl#sameAs> <http://a/b/c/d;p?q#afragment> . <http://a/> <http://www.w3.org/2002/07/owl#sameAs> <http://a/> . +<view-source://foo> <http://www.w3.org/2002/07/owl#sameAs> <view-source://foo> . +<coap+tcp://foo> <http://www.w3.org/2002/07/owl#sameAs> <coap+tcp://foo> . +<osc.udp://foo> <http://www.w3.org/2002/07/owl#sameAs> <osc.udp://foo> . <http://B/foo> <http://www.w3.org/2002/07/owl#sameAs> <http://B/foo> . <http://C/bar> <http://www.w3.org/2002/07/owl#sameAs> <http://C/bar> . diff --git a/tests/good/test-uri.ttl b/tests/good/test-uri.ttl index cf43a38b..b6a8d967 100644 --- a/tests/good/test-uri.ttl +++ b/tests/good/test-uri.ttl @@ -58,6 +58,9 @@ <?aquery> owl:sameAs <http://a/b/c/d;p?aquery> . <#afragment> owl:sameAs <http://a/b/c/d;p?q#afragment> . <../../../../../../> owl:sameAs <http://a/> . +<view-source://foo> owl:sameAs <view-source://foo> . +<coap+tcp://foo> owl:sameAs <coap+tcp://foo> . +<osc.udp://foo> owl:sameAs <osc.udp://foo> . @base <http://B?bquery> . @@ -392,7 +392,7 @@ def test(ctx): os.chdir(orig_dir) os.chdir(srcdir) - bad_tests = glob.glob('tests/bad/*.ttl') + bad_tests = glob.glob('tests/bad/*.ttl') + glob.glob('tests/bad/*.nt') bad_tests.sort() os.chdir(orig_dir) @@ -483,8 +483,9 @@ def test(ctx): for lax in ['', '-l']: autowaf.run_test( ctx, APPNAME, - 'serdi_static %s -q "%s" "%s" > %s.out' % ( - lax, os.path.join(srcdir, test), test_base(test), test), + 'serdi_static %s -i %s -q "%s" "%s" > %s.out' % ( + lax, 'turtle' if test.endswith('.ttl') else 'ntriples', + os.path.join(srcdir, test), test_base(test), test), 1, name=test) autowaf.end_tests(ctx, APPNAME, 'bad') |