diff options
author | David Robillard <d@drobilla.net> | 2023-02-27 12:31:33 -0500 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2023-12-02 18:49:08 -0500 |
commit | ba897eee8f9a55ab31e338a036fe924bc90be5ef (patch) | |
tree | 8b0fabc5939617e6210961021b919f424b43ea85 | |
parent | 8fb8b922fb7575d7aed2f2184a02586b56ba873a (diff) | |
download | serd-ba897eee8f9a55ab31e338a036fe924bc90be5ef.tar.gz serd-ba897eee8f9a55ab31e338a036fe924bc90be5ef.tar.bz2 serd-ba897eee8f9a55ab31e338a036fe924bc90be5ef.zip |
[WIP] Improve read_IRIREF_suffix() performance
-rw-r--r-- | src/read_ntriples.c | 61 |
1 files changed, 23 insertions, 38 deletions
diff --git a/src/read_ntriples.c b/src/read_ntriples.c index c431574a..9519a6d1 100644 --- a/src/read_ntriples.c +++ b/src/read_ntriples.c @@ -179,6 +179,13 @@ read_pct_encoded(SerdReader* const reader, SerdNode* const node) } static SerdStatus +r_iri_char_err(SerdReader* const reader, const uint32_t code) +{ + return r_err( + reader, SERD_BAD_SYNTAX, "U+%04X is not a valid IRI character", code); +} + +static SerdStatus read_IRIREF_suffix(SerdReader* const reader, SerdNode* const node) { SerdStatus st = SERD_SUCCESS; @@ -188,50 +195,28 @@ read_IRIREF_suffix(SerdReader* const reader, SerdNode* const node) const int c = peek_byte(reader); skip_byte(reader, c); - switch (c) { - case ' ': - case '"': - case '<': - case '^': - case '`': - case '{': - case '|': - case '}': - return r_err( - reader, SERD_BAD_SYNTAX, "'%c' is not a valid IRI character", c); - - case '>': + if (c == '>') { return SERD_SUCCESS; + } - case '%': - st = (reader->flags & SERD_READ_DECODED) ? read_pct_encoded(reader, node) - : push_byte(reader, node, c); - break; - - case '\\': + if (c >= 0x80) { + st = read_utf8_continuation(reader, node, (uint8_t)c); + } else if (c == '%' && (reader->flags & SERD_READ_DECODED)) { + st = read_pct_encoded(reader, node); + } else if (c == '\\') { if (!(st = read_UCHAR(reader, node, &code)) && (code == ' ' || code == '<' || code == '>')) { - return r_err( - reader, SERD_BAD_SYNTAX, "U+%04X is not a valid IRI character", code); + st = r_iri_char_err(reader, code); } - break; - - default: - if (c >= 0x80) { - st = read_utf8_continuation(reader, node, (uint8_t)c); - } else if (c > 0x20) { + } else if (c > 0x20 && c != '"' && c != '<' && c != '^' && c != '`' && + c != '{' && c != '|' && c != '}') { + st = push_byte(reader, node, c); + } else if (c < 0) { + st = r_err(reader, SERD_BAD_SYNTAX, "unexpected end of file"); + } else { + st = r_iri_char_err(reader, (uint32_t)c); + if (!reader->strict) { st = push_byte(reader, node, c); - } else if (c < 0) { - st = r_err(reader, SERD_BAD_SYNTAX, "unexpected end of file"); - } else { - st = r_err(reader, - SERD_BAD_SYNTAX, - "control character U+%04X is not a valid IRI character", - (uint32_t)c); - - if (!reader->strict) { - st = push_byte(reader, node, c); - } } } } |