aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/reader.c16
-rw-r--r--src/writer.c8
-rw-r--r--tests/UTF-8.ttl5
3 files changed, 2 insertions, 27 deletions
diff --git a/src/reader.c b/src/reader.c
index 773dd99b..3a4ef452 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -349,10 +349,6 @@ read_hex_escape(SerdReader reader, unsigned length, Ref dest)
size = 3;
} else if (c < 0x00200000) {
size = 4;
- } else if (c < 0x04000000) {
- size = 5;
- } else if (c < 0x80000000) {
- size = 6;
} else {
return false;
}
@@ -360,14 +356,6 @@ read_hex_escape(SerdReader reader, unsigned length, Ref dest)
// Build output in buf
// (Note # of bytes = # of leading 1 bits in first byte)
switch (size) {
- case 6:
- buf[5] = 0x80 | (uint8_t)(c & 0x3F);
- c >>= 6;
- c |= (4 << 24); // set bit 2
- case 5:
- buf[4] = 0x80 | (uint8_t)(c & 0x3F);
- c >>= 6;
- c |= (8 << 18); // set bit 3
case 4:
buf[3] = 0x80 | (uint8_t)(c & 0x3F);
c >>= 6;
@@ -488,10 +476,6 @@ read_character(SerdReader reader, Ref dest)
size = 3;
} else if ((c & 0xF8) == 0xF0) { // Starts with `11110'
size = 4;
- } else if ((c & 0xFC) == 0xF8) { // Starts with `111110'
- size = 5;
- } else if ((c & 0xFE) == 0xFC) { // Starts with `1111110'
- size = 6;
} else {
error(reader, "invalid character\n");
return SERD_ERROR;
diff --git a/src/writer.c b/src/writer.c
index 1de08a4c..d7d38baa 100644
--- a/src/writer.c
+++ b/src/writer.c
@@ -101,12 +101,6 @@ write_text(SerdWriter writer, TextContext ctx,
} else if ((in & 0xF8) == 0xF0) { // Starts with `11110'
size = 4;
c = in & 0x07;
- } else if ((in & 0xFC) == 0xF8) { // Starts with `111110'
- size = 5;
- c = in & 0x03;
- } else if ((in & 0xFE) == 0xFC) { // Starts with `1111110'
- size = 6;
- c = in & 0x01;
} else {
fprintf(stderr, "invalid UTF-8 at offset %zu: %X\n", i, in);
return false;
@@ -128,8 +122,6 @@ write_text(SerdWriter writer, TextContext ctx,
} while (0)
switch (size) {
- case 6: READ_BYTE();
- case 5: READ_BYTE();
case 4: READ_BYTE();
case 3: READ_BYTE();
case 2: READ_BYTE();
diff --git a/tests/UTF-8.ttl b/tests/UTF-8.ttl
index 66be456e..9e9a7b33 100644
--- a/tests/UTF-8.ttl
+++ b/tests/UTF-8.ttl
@@ -214,7 +214,6 @@ Box drawing alignment tests: █
▝▀▘▙▄▟
""" .
<> rdfs:comment """
- 5 byte character: \U3FFFFFF
- 6 byte character: \U7FFFFFFF
+ Two byte Unicode escape: \U00E0
+ Largest Unicode escape in Turtle: \U0010FFFF
""" .
-