diff options
-rw-r--r-- | src/reader.c | 16 | ||||
-rw-r--r-- | src/writer.c | 8 | ||||
-rw-r--r-- | tests/UTF-8.ttl | 5 |
3 files changed, 2 insertions, 27 deletions
diff --git a/src/reader.c b/src/reader.c index 773dd99b..3a4ef452 100644 --- a/src/reader.c +++ b/src/reader.c @@ -349,10 +349,6 @@ read_hex_escape(SerdReader reader, unsigned length, Ref dest) size = 3; } else if (c < 0x00200000) { size = 4; - } else if (c < 0x04000000) { - size = 5; - } else if (c < 0x80000000) { - size = 6; } else { return false; } @@ -360,14 +356,6 @@ read_hex_escape(SerdReader reader, unsigned length, Ref dest) // Build output in buf // (Note # of bytes = # of leading 1 bits in first byte) switch (size) { - case 6: - buf[5] = 0x80 | (uint8_t)(c & 0x3F); - c >>= 6; - c |= (4 << 24); // set bit 2 - case 5: - buf[4] = 0x80 | (uint8_t)(c & 0x3F); - c >>= 6; - c |= (8 << 18); // set bit 3 case 4: buf[3] = 0x80 | (uint8_t)(c & 0x3F); c >>= 6; @@ -488,10 +476,6 @@ read_character(SerdReader reader, Ref dest) size = 3; } else if ((c & 0xF8) == 0xF0) { // Starts with `11110' size = 4; - } else if ((c & 0xFC) == 0xF8) { // Starts with `111110' - size = 5; - } else if ((c & 0xFE) == 0xFC) { // Starts with `1111110' - size = 6; } else { error(reader, "invalid character\n"); return SERD_ERROR; diff --git a/src/writer.c b/src/writer.c index 1de08a4c..d7d38baa 100644 --- a/src/writer.c +++ b/src/writer.c @@ -101,12 +101,6 @@ write_text(SerdWriter writer, TextContext ctx, } else if ((in & 0xF8) == 0xF0) { // Starts with `11110' size = 4; c = in & 0x07; - } else if ((in & 0xFC) == 0xF8) { // Starts with `111110' - size = 5; - c = in & 0x03; - } else if ((in & 0xFE) == 0xFC) { // Starts with `1111110' - size = 6; - c = in & 0x01; } else { fprintf(stderr, "invalid UTF-8 at offset %zu: %X\n", i, in); return false; @@ -128,8 +122,6 @@ write_text(SerdWriter writer, TextContext ctx, } while (0) switch (size) { - case 6: READ_BYTE(); - case 5: READ_BYTE(); case 4: READ_BYTE(); case 3: READ_BYTE(); case 2: READ_BYTE(); diff --git a/tests/UTF-8.ttl b/tests/UTF-8.ttl index 66be456e..9e9a7b33 100644 --- a/tests/UTF-8.ttl +++ b/tests/UTF-8.ttl @@ -214,7 +214,6 @@ Box drawing alignment tests: █ ▝▀▘▙▄▟ """ . <> rdfs:comment """ - 5 byte character: \U3FFFFFF - 6 byte character: \U7FFFFFFF + Two byte Unicode escape: \U00E0 + Largest Unicode escape in Turtle: \U0010FFFF """ . - |