aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2017-07-10 11:23:34 +0200
committerDavid Robillard <d@drobilla.net>2017-07-10 12:07:46 +0200
commit0cd0c7356b3670f6ee8ef10c7ae31a9649ce4baa (patch)
tree4162d73ced9a55adc3a2659729081038c6527097
parent5e9b029e9add90981d31e5577f358ec7db3de3ca (diff)
downloadserd-0cd0c7356b3670f6ee8ef10c7ae31a9649ce4baa.tar.gz
serd-0cd0c7356b3670f6ee8ef10c7ae31a9649ce4baa.tar.bz2
serd-0cd0c7356b3670f6ee8ef10c7ae31a9649ce4baa.zip
Fix hangs when reading corrupt UTF-8
-rw-r--r--NEWS3
-rw-r--r--src/reader.c43
-rw-r--r--tests/bad/bad-char-in-uri.ttl1
-rw-r--r--tests/bad/bad-long-literal-in-list.ttl1
4 files changed, 25 insertions, 23 deletions
diff --git a/NEWS b/NEWS
index c5825c77..5b7c5038 100644
--- a/NEWS
+++ b/NEWS
@@ -5,8 +5,9 @@ serd (0.27.2) unstable;
* Fix strict parsing of abolute URI schemes
* Fix parsing of hex escapes in file URIs (thanks Johannes Mueller)
* Gracefully handle applications that write corrupt UTF-8
+ * FIx hangs when reading corrupt UTF-8
- -- David Robillard <d@drobilla.net> Sun, 09 Jul 2017 20:43:13 +0200
+ -- David Robillard <d@drobilla.net> Mon, 10 Jul 2017 11:23:25 +0200
serd (0.26.0) stable;
diff --git a/src/reader.c b/src/reader.c
index d3a3336e..c69e59cc 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -403,7 +403,7 @@ bad_char(SerdReader* reader, Ref dest, const char* fmt, uint8_t c)
b = peek_byte(reader);
}
- return SERD_SUCCESS;
+ return SERD_FAILURE;
}
static SerdStatus
@@ -511,7 +511,7 @@ static Ref
read_STRING_LITERAL_LONG(SerdReader* reader, SerdNodeFlags* flags, uint8_t q)
{
Ref ref = push_node(reader, SERD_LITERAL, "", 0);
- while (true) {
+ while (!reader->status) {
const uint8_t c = peek_byte(reader);
uint32_t code;
switch (c) {
@@ -550,7 +550,7 @@ static Ref
read_STRING_LITERAL(SerdReader* reader, SerdNodeFlags* flags, uint8_t q)
{
Ref ref = push_node(reader, SERD_LITERAL, "", 0);
- while (true) {
+ while (!reader->status) {
const uint8_t c = peek_byte(reader);
uint32_t code = 0;
switch (c) {
@@ -799,18 +799,16 @@ read_IRIREF(SerdReader* reader)
}
uint32_t code = 0;
- while (true) {
- const uint8_t c = peek_byte(reader);
+ while (!reader->status) {
+ const uint8_t c = eat_byte_safe(reader, peek_byte(reader));
switch (c) {
case '"': case '<': case '^': case '`': case '{': case '|': case '}':
r_err(reader, SERD_ERR_BAD_SYNTAX,
"invalid IRI character `%c'\n", c);
return pop_node(reader, ref);
case '>':
- eat_byte_safe(reader, c);
return ref;
case '\\':
- eat_byte_safe(reader, c);
if (!read_UCHAR(reader, ref, &code)) {
r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid IRI escape\n");
return pop_node(reader, ref);
@@ -834,12 +832,19 @@ read_IRIREF(SerdReader* reader)
if (reader->strict) {
return pop_node(reader, ref);
}
- push_byte(reader, ref, eat_byte_safe(reader, c));
- } else {
- push_byte(reader, ref, eat_byte_safe(reader, c));
+ reader->status = SERD_FAILURE;
+ push_byte(reader, ref, c);
+ } else if (!(c & 0x80)) {
+ push_byte(reader, ref, c);
+ } else if (read_utf8_character(reader, ref, c)) {
+ if (reader->strict) {
+ return pop_node(reader, ref);
+ }
+ reader->status = SERD_FAILURE;
}
}
}
+ return pop_node(reader, ref);
}
static bool
@@ -1254,18 +1259,16 @@ read_objectList(SerdReader* reader, ReadContext ctx, bool* ate_dot)
static bool
read_predicateObjectList(SerdReader* reader, ReadContext ctx, bool* ate_dot)
{
- uint8_t c;
- while (true) {
- TRY_THROW(read_verb(reader, &ctx.predicate));
- read_ws_star(reader);
-
- TRY_THROW(read_objectList(reader, ctx, ate_dot));
+ while (read_verb(reader, &ctx.predicate) &&
+ read_ws_star(reader) &&
+ read_objectList(reader, ctx, ate_dot)) {
ctx.predicate = pop_node(reader, ctx.predicate);
if (*ate_dot) {
return true;
}
- bool ate_semi = false;
+ bool ate_semi = false;
+ uint8_t c;
do {
read_ws_star(reader);
switch (c = peek_byte(reader)) {
@@ -1284,11 +1287,7 @@ read_predicateObjectList(SerdReader* reader, ReadContext ctx, bool* ate_dot)
}
}
- pop_node(reader, ctx.predicate);
- return true;
-except:
- pop_node(reader, ctx.predicate);
- return false;
+ return pop_node(reader, ctx.predicate);
}
static bool
diff --git a/tests/bad/bad-char-in-uri.ttl b/tests/bad/bad-char-in-uri.ttl
new file mode 100644
index 00000000..49f9c0d4
--- /dev/null
+++ b/tests/bad/bad-char-in-uri.ttl
@@ -0,0 +1 @@
+<ÿÿÿ://a.example/s> <http://a.eoampl†/p> "\u0006!#[]\u007F" .
diff --git a/tests/bad/bad-long-literal-in-list.ttl b/tests/bad/bad-long-literal-in-list.ttl
new file mode 100644
index 00000000..f10b4c3d
--- /dev/null
+++ b/tests/bad/bad-long-literal-in-list.ttl
@@ -0,0 +1 @@
+<> <http://example.org/pred> ("""") . \ No newline at end of file