From 195e4bcff3c4dfd3fe8bbf0df57d53ce89ca99e8 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Thu, 29 Jun 2017 12:21:09 -0400 Subject: Fix strict parsing of abolute URI schemes --- NEWS | 3 ++- src/reader.c | 43 ++++++++++++++++++++++--------------- src/serd_internal.h | 11 ++++++++++ src/uri.c | 12 ++++------- tests/bad/bad-missing-uri-scheme.nt | 1 + tests/bad/bad-uri-scheme-start.nt | 1 + tests/bad/bad-uri-scheme.nt | 1 + tests/bad/bad-uri-truncated.nt | 1 + tests/good/test-uri.nt | 3 +++ tests/good/test-uri.ttl | 3 +++ wscript | 7 +++--- 11 files changed, 57 insertions(+), 29 deletions(-) create mode 100644 tests/bad/bad-missing-uri-scheme.nt create mode 100644 tests/bad/bad-uri-scheme-start.nt create mode 100644 tests/bad/bad-uri-scheme.nt create mode 100644 tests/bad/bad-uri-truncated.nt diff --git a/NEWS b/NEWS index 679604cc..5839d2a3 100644 --- a/NEWS +++ b/NEWS @@ -1,8 +1,9 @@ serd (0.27.1) unstable; * Add support for reading from a user provided callback + * Fix strict parsing of abolute URI schemes - -- David Robillard Mon, 24 Apr 2017 19:06:08 +0200 + -- David Robillard Thu, 29 Jun 2017 12:20:40 -0400 serd (0.26.0) stable; diff --git a/src/reader.c b/src/reader.c index 29526223..54e2724a 100644 --- a/src/reader.c +++ b/src/reader.c @@ -773,35 +773,44 @@ read_LANGTAG(SerdReader* reader) return ref; } -typedef enum { PREFIX, GOOD, BAD} SchemeState; - -static inline bool -check_scheme(SerdReader* reader, uint8_t c, SchemeState* state) +static bool +read_IRIREF_scheme(SerdReader* reader, Ref dest) { - if (!supports_relative_iris(reader) && *state == PREFIX) { - if (c == ':') { - *state = GOOD; - } else if (!isalpha(c)) { - *state = BAD; + uint8_t c = peek_byte(reader); + if (!isalpha(c)) { + return r_err(reader, SERD_ERR_BAD_SYNTAX, + "bad IRI scheme start `%c'\n", c); + } + + while ((c = peek_byte(reader))) { + if (c == '>') { + return r_err(reader, SERD_ERR_BAD_SYNTAX, "missing IRI scheme\n"); + } else if (!is_uri_scheme_char(c)) { return r_err(reader, SERD_ERR_BAD_SYNTAX, - "syntax does not support relative IRIs\n"); + "bad IRI scheme char `%X'\n", c); + } + + push_byte(reader, dest, eat_byte_safe(reader, c)); + if (c == ':') { + return true; // End of scheme } } - return true; + + return false; } static Ref read_IRIREF(SerdReader* reader) { TRY_RET(eat_byte_check(reader, '<')); - Ref ref = push_node(reader, SERD_URI, "", 0); - SchemeState scheme = PREFIX; - uint32_t code; + Ref ref = push_node(reader, SERD_URI, "", 0); + if (!supports_relative_iris(reader) && !read_IRIREF_scheme(reader, ref)) { + return pop_node(reader, ref); + } + + uint32_t code; while (true) { const uint8_t c = peek_byte(reader); - if (!check_scheme(reader, c, &scheme)) { - return pop_node(reader, ref); - } switch (c) { case '"': case '<': case '^': case '`': case '{': case '|': case '}': r_err(reader, SERD_ERR_BAD_SYNTAX, diff --git a/src/serd_internal.h b/src/serd_internal.h index affdd31f..297b4507 100644 --- a/src/serd_internal.h +++ b/src/serd_internal.h @@ -368,6 +368,17 @@ uri_is_under(const SerdURI* uri, const SerdURI* root) return true; } +static inline bool +is_uri_scheme_char(const uint8_t c) +{ + switch (c) { + case ':': case '+': case '-': case '.': + return true; + default: + return is_alpha(c) || is_digit(c); + } +} + /* Error reporting */ static inline void diff --git a/src/uri.c b/src/uri.c index 6b4fc07e..fcea3b62 100644 --- a/src/uri.c +++ b/src/uri.c @@ -103,16 +103,12 @@ serd_uri_string_has_scheme(const uint8_t* utf8) if (!utf8 || !is_alpha(utf8[0])) { return false; // Invalid scheme initial character, URI is relative } + for (uint8_t c; (c = *++utf8) != '\0';) { - switch (c) { - case ':': + if (!is_uri_scheme_char(c)) { + return false; + } else if (c == ':') { return true; // End of scheme - case '+': case '-': case '.': - break; // Valid scheme character, continue - default: - if (!is_alpha(c) && !is_digit(c)) { - return false; // Invalid scheme character - } } } diff --git a/tests/bad/bad-missing-uri-scheme.nt b/tests/bad/bad-missing-uri-scheme.nt new file mode 100644 index 00000000..5d7bc724 --- /dev/null +++ b/tests/bad/bad-missing-uri-scheme.nt @@ -0,0 +1 @@ + . diff --git a/tests/bad/bad-uri-scheme-start.nt b/tests/bad/bad-uri-scheme-start.nt new file mode 100644 index 00000000..cd3fd70f --- /dev/null +++ b/tests/bad/bad-uri-scheme-start.nt @@ -0,0 +1 @@ +<2http://example.org/s> . diff --git a/tests/bad/bad-uri-scheme.nt b/tests/bad/bad-uri-scheme.nt new file mode 100644 index 00000000..1329edcd --- /dev/null +++ b/tests/bad/bad-uri-scheme.nt @@ -0,0 +1 @@ + . diff --git a/tests/bad/bad-uri-truncated.nt b/tests/bad/bad-uri-truncated.nt new file mode 100644 index 00000000..22d29e4b --- /dev/null +++ b/tests/bad/bad-uri-truncated.nt @@ -0,0 +1 @@ + . . . + . + . + . . . diff --git a/tests/good/test-uri.ttl b/tests/good/test-uri.ttl index cf43a38b..b6a8d967 100644 --- a/tests/good/test-uri.ttl +++ b/tests/good/test-uri.ttl @@ -58,6 +58,9 @@ owl:sameAs . <#afragment> owl:sameAs . <../../../../../../> owl:sameAs . + owl:sameAs . + owl:sameAs . + owl:sameAs . @base . diff --git a/wscript b/wscript index 7558d73b..3aff945c 100644 --- a/wscript +++ b/wscript @@ -392,7 +392,7 @@ def test(ctx): os.chdir(orig_dir) os.chdir(srcdir) - bad_tests = glob.glob('tests/bad/*.ttl') + bad_tests = glob.glob('tests/bad/*.ttl') + glob.glob('tests/bad/*.nt') bad_tests.sort() os.chdir(orig_dir) @@ -483,8 +483,9 @@ def test(ctx): for lax in ['', '-l']: autowaf.run_test( ctx, APPNAME, - 'serdi_static %s -q "%s" "%s" > %s.out' % ( - lax, os.path.join(srcdir, test), test_base(test), test), + 'serdi_static %s -i %s -q "%s" "%s" > %s.out' % ( + lax, 'turtle' if test.endswith('.ttl') else 'ntriples', + os.path.join(srcdir, test), test_base(test), test), 1, name=test) autowaf.end_tests(ctx, APPNAME, 'bad') -- cgit v1.2.1