diff options
author | David Robillard <d@drobilla.net> | 2024-06-25 15:54:30 -0400 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2024-06-25 16:51:38 -0400 |
commit | d0c6099462d40a63399d02c2a84237d7fd2f89f8 (patch) | |
tree | 9aa496ddd58f6842cc102f2fcfe3896b6f85ca00 | |
parent | 51cc405d288a078543523efd380ce3c7d11ac501 (diff) | |
download | serd-d0c6099462d40a63399d02c2a84237d7fd2f89f8.tar.gz serd-d0c6099462d40a63399d02c2a84237d7fd2f89f8.tar.bz2 serd-d0c6099462d40a63399d02c2a84237d7fd2f89f8.zip |
Reduce size of character classification code
-rw-r--r-- | src/n3.c | 53 | ||||
-rw-r--r-- | src/node.c | 32 | ||||
-rw-r--r-- | src/writer.c | 60 |
3 files changed, 24 insertions, 121 deletions
@@ -56,14 +56,11 @@ read_UCHAR(SerdReader* const reader, const Ref dest, uint32_t* const char_code) { const int b = peek_byte(reader); unsigned length = 0; - switch (b) { - case 'U': + if (b == 'U') { length = 8; - break; - case 'u': + } else if (b == 'u') { length = 4; - break; - default: + } else { return SERD_ERR_BAD_SYNTAX; } @@ -239,18 +236,12 @@ read_character(SerdReader* const reader, const uint8_t c) { if (!(c & 0x80)) { - switch (c) { - case 0xA: - case 0xD: + if (c == 0xA || c == 0xD) { *flags |= SERD_HAS_NEWLINE; - break; - case '"': - case '\'': + } else if (c == '"' || c == '\'') { *flags |= SERD_HAS_QUOTE; - break; - default: - break; } + return push_byte(reader, dest, c); } @@ -537,34 +528,10 @@ read_PN_LOCAL_ESC(SerdReader* const reader, const Ref dest) skip_byte(reader, '\\'); const int c = peek_byte(reader); - switch (c) { - case '!': - case '#': - case '$': - case '%': - case '&': - case '\'': - case '(': - case ')': - case '*': - case '+': - case ',': - case '-': - case '.': - case '/': - case ';': - case '=': - case '?': - case '@': - case '_': - case '~': - push_byte(reader, dest, eat_byte_safe(reader, c)); - break; - default: - return r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid escape\n"); - } - - return SERD_SUCCESS; + return ((c == '!') || in_range(c, '#', '/') || (c == ';') || (c == '=') || + (c == '?') || (c == '@') || (c == '_') || (c == '~')) + ? push_byte(reader, dest, eat_byte_safe(reader, c)) + : r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid escape\n"); } static SerdStatus @@ -142,37 +142,7 @@ serd_node_new_uri_from_string(const uint8_t* const str, static bool is_uri_path_char(const uint8_t c) { - if (is_alpha(c) || is_digit(c)) { - return true; - } - - switch (c) { - // unreserved: - case '-': - case '.': - case '_': - case '~': - case ':': - - case '@': // pchar - case '/': // separator - - // sub-delimiters: - case '!': - case '$': - case '&': - case '\'': - case '(': - case ')': - case '*': - case '+': - case ',': - case ';': - case '=': - return true; - default: - return false; - } + return is_alpha(c) || is_digit(c) || strchr("!$&\'()*+,-./:;=@_~", c); } static bool diff --git a/src/writer.c b/src/writer.c index 08192bb1..e4ef5651 100644 --- a/src/writer.c +++ b/src/writer.c @@ -283,21 +283,8 @@ write_character(SerdWriter* writer, SERD_NODISCARD static bool uri_must_escape(const uint8_t c) { - switch (c) { - case ' ': - case '"': - case '<': - case '>': - case '\\': - case '^': - case '`': - case '{': - case '|': - case '}': - return true; - default: - return !in_range(c, 0x20, 0x7E); - } + return (c == '"') || (c == '<') || (c == '>') || (c == '\\') || (c == '^') || + (c == '`') || in_range(c, '{', '}') || !in_range(c, 0x21, 0x7E); } static size_t @@ -368,38 +355,17 @@ write_uri_from_node(SerdWriter* writer, const SerdNode* node) static bool lname_must_escape(const uint8_t c) { - /* This arbitrary list of characters, most of which have nothing to do with - Turtle, must be handled as special cases here because the RDF and SPARQL - WGs are apparently intent on making the once elegant Turtle a baroque - and inconsistent mess, throwing elegance and extensibility completely - out the window for no good reason. - - Note '-', '.', and '_' are also in PN_LOCAL_ESC, but are valid unescaped - in local names, so they are not escaped here. */ - - switch (c) { - case '\'': - case '!': - case '#': - case '$': - case '%': - case '&': - case '(': - case ')': - case '*': - case '+': - case ',': - case '/': - case ';': - case '=': - case '?': - case '@': - case '~': - return true; - default: - break; - } - return false; + /* Most of these characters have nothing to do with Turtle, but were taken + from SPARQL and mashed into the Turtle grammar (despite not being used) + with RDF 1.1. So now Turtle is a mess because the SPARQL grammar is + poorly designed and didn't use a leading character to distinguish things + like path patterns like it should have. + + Note that '-', '.', and '_' are also in PN_LOCAL_ESC, but are valid + unescaped in local names, so they are not escaped here. */ + + return (c == '!') || (c == '/') || (c == ';') || (c == '=') || (c == '?') || + (c == '@') || (c == '~') || in_range(c, '#', ','); } SERD_NODISCARD static SerdStatus |