From ea4b6e7d109ae3abc7f8ecdf99e3eb33e3484b77 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Sun, 5 Feb 2023 12:42:52 -0500 Subject: Strengthen handling of corrupt UTF-8 input --- test/extra/lax/test-bad-utf8.ttl | 66 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 60 insertions(+), 6 deletions(-) (limited to 'test/extra/lax/test-bad-utf8.ttl') diff --git a/test/extra/lax/test-bad-utf8.ttl b/test/extra/lax/test-bad-utf8.ttl index 0e177366..e5640078 100644 --- a/test/extra/lax/test-bad-utf8.ttl +++ b/test/extra/lax/test-bad-utf8.ttl @@ -1,6 +1,60 @@ - "Impossible bytes: " . - "2 continuation bytes: " . - "Missing continuation: " . - """Impossible bytes: """ . - """2 continuation bytes: """ . - """Missing continuation: """ . + "The other values of this property should align nicely" . + "Impossible byte 1: |" . + "Impossible byte 2: |" . + "Four impossible bytes: |" . + "First continuation byte: |" . + "Last continuation byte: |" . + "2 continuation bytes: |" . + "3 continuation bytes: |" . + "4 continuation bytes: |" . + "5 continuation bytes: |" . + "6 continuation bytes: |" . + "7 continuation bytes: |" . + "Continuation bytes 1: |" . + "Continuation bytes 2: |" . + "Continuation bytes 3: |" . + "Continuation bytes 4: |" . + "Lonely leading bytes of 2-byte sequences 1: |" . + "Lonely leading bytes of 2-byte sequences 2: |" . + "Lonely leading bytes of 3-byte sequences: |" . + "Lonely leading bytes of 4-byte sequences: |" . + "Lonely leading bytes of 5-byte sequences: |" . + "Lonely leading bytes of 6-byte sequences: |" . + "2-byte sequence with last byte missing (U+0000): |" . + "3-byte sequence with last byte missing (U+0000): |" . + "4-byte sequence with last byte missing (U+0000): |" . + "2-byte sequence with last byte missing (U-000007FF): |" . + "3-byte sequence with last byte missing (U-0000FFFF): |" . + "4-byte sequence with last byte missing (U-001FFFFF): |" . + "6 sequences with last byte missing: |" . + + """ +Impossible byte 1: | +Impossible byte 2: | +Four impossible bytes: | +First continuation byte: | +Last continuation byte: | +2 continuation bytes: | +3 continuation bytes: | +4 continuation bytes: | +5 continuation bytes: | +6 continuation bytes: | +7 continuation bytes: | +Continuation bytes 1: | +Continuation bytes 2: | +Continuation bytes 3: | +Continuation bytes 4: | +Lonely leading bytes of 2-byte sequences 1: | +Lonely leading bytes of 2-byte sequences 2: | +Lonely leading bytes of 3-byte sequences: | +Lonely leading bytes of 4-byte sequences: | +Lonely leading bytes of 5-byte sequences: | +Lonely leading bytes of 6-byte sequences: | +2-byte sequence with last byte missing (U+0000): | +3-byte sequence with last byte missing (U+0000): | +4-byte sequence with last byte missing (U+0000): | +2-byte sequence with last byte missing (U-000007FF): | +3-byte sequence with last byte missing (U-0000FFFF): | +4-byte sequence with last byte missing (U-001FFFFF): | +6 sequences with last byte missing: | +""" . -- cgit v1.2.1