From 0cd0c7356b3670f6ee8ef10c7ae31a9649ce4baa Mon Sep 17 00:00:00 2001 From: David Robillard Date: Mon, 10 Jul 2017 11:23:34 +0200 Subject: Fix hangs when reading corrupt UTF-8 --- NEWS | 3 ++- src/reader.c | 43 +++++++++++++++++----------------- tests/bad/bad-char-in-uri.ttl | 1 + tests/bad/bad-long-literal-in-list.ttl | 1 + 4 files changed, 25 insertions(+), 23 deletions(-) create mode 100644 tests/bad/bad-char-in-uri.ttl create mode 100644 tests/bad/bad-long-literal-in-list.ttl diff --git a/NEWS b/NEWS index c5825c77..5b7c5038 100644 --- a/NEWS +++ b/NEWS @@ -5,8 +5,9 @@ serd (0.27.2) unstable; * Fix strict parsing of abolute URI schemes * Fix parsing of hex escapes in file URIs (thanks Johannes Mueller) * Gracefully handle applications that write corrupt UTF-8 + * FIx hangs when reading corrupt UTF-8 - -- David Robillard Sun, 09 Jul 2017 20:43:13 +0200 + -- David Robillard Mon, 10 Jul 2017 11:23:25 +0200 serd (0.26.0) stable; diff --git a/src/reader.c b/src/reader.c index d3a3336e..c69e59cc 100644 --- a/src/reader.c +++ b/src/reader.c @@ -403,7 +403,7 @@ bad_char(SerdReader* reader, Ref dest, const char* fmt, uint8_t c) b = peek_byte(reader); } - return SERD_SUCCESS; + return SERD_FAILURE; } static SerdStatus @@ -511,7 +511,7 @@ static Ref read_STRING_LITERAL_LONG(SerdReader* reader, SerdNodeFlags* flags, uint8_t q) { Ref ref = push_node(reader, SERD_LITERAL, "", 0); - while (true) { + while (!reader->status) { const uint8_t c = peek_byte(reader); uint32_t code; switch (c) { @@ -550,7 +550,7 @@ static Ref read_STRING_LITERAL(SerdReader* reader, SerdNodeFlags* flags, uint8_t q) { Ref ref = push_node(reader, SERD_LITERAL, "", 0); - while (true) { + while (!reader->status) { const uint8_t c = peek_byte(reader); uint32_t code = 0; switch (c) { @@ -799,18 +799,16 @@ read_IRIREF(SerdReader* reader) } uint32_t code = 0; - while (true) { - const uint8_t c = peek_byte(reader); + while (!reader->status) { + const uint8_t c = eat_byte_safe(reader, peek_byte(reader)); switch (c) { case '"': case '<': case '^': case '`': case '{': case '|': case '}': r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid IRI character `%c'\n", c); return pop_node(reader, ref); case '>': - eat_byte_safe(reader, c); return ref; case '\\': - eat_byte_safe(reader, c); if (!read_UCHAR(reader, ref, &code)) { r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid IRI escape\n"); return pop_node(reader, ref); @@ -834,12 +832,19 @@ read_IRIREF(SerdReader* reader) if (reader->strict) { return pop_node(reader, ref); } - push_byte(reader, ref, eat_byte_safe(reader, c)); - } else { - push_byte(reader, ref, eat_byte_safe(reader, c)); + reader->status = SERD_FAILURE; + push_byte(reader, ref, c); + } else if (!(c & 0x80)) { + push_byte(reader, ref, c); + } else if (read_utf8_character(reader, ref, c)) { + if (reader->strict) { + return pop_node(reader, ref); + } + reader->status = SERD_FAILURE; } } } + return pop_node(reader, ref); } static bool @@ -1254,18 +1259,16 @@ read_objectList(SerdReader* reader, ReadContext ctx, bool* ate_dot) static bool read_predicateObjectList(SerdReader* reader, ReadContext ctx, bool* ate_dot) { - uint8_t c; - while (true) { - TRY_THROW(read_verb(reader, &ctx.predicate)); - read_ws_star(reader); - - TRY_THROW(read_objectList(reader, ctx, ate_dot)); + while (read_verb(reader, &ctx.predicate) && + read_ws_star(reader) && + read_objectList(reader, ctx, ate_dot)) { ctx.predicate = pop_node(reader, ctx.predicate); if (*ate_dot) { return true; } - bool ate_semi = false; + bool ate_semi = false; + uint8_t c; do { read_ws_star(reader); switch (c = peek_byte(reader)) { @@ -1284,11 +1287,7 @@ read_predicateObjectList(SerdReader* reader, ReadContext ctx, bool* ate_dot) } } - pop_node(reader, ctx.predicate); - return true; -except: - pop_node(reader, ctx.predicate); - return false; + return pop_node(reader, ctx.predicate); } static bool diff --git a/tests/bad/bad-char-in-uri.ttl b/tests/bad/bad-char-in-uri.ttl new file mode 100644 index 00000000..49f9c0d4 --- /dev/null +++ b/tests/bad/bad-char-in-uri.ttl @@ -0,0 +1 @@ +<ÿÿÿ://a.example/s> "\u0006!#[]\u007F" . diff --git a/tests/bad/bad-long-literal-in-list.ttl b/tests/bad/bad-long-literal-in-list.ttl new file mode 100644 index 00000000..f10b4c3d --- /dev/null +++ b/tests/bad/bad-long-literal-in-list.ttl @@ -0,0 +1 @@ +<> ("""") . \ No newline at end of file -- cgit v1.2.1