aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2024-06-25 15:54:30 -0400
committerDavid Robillard <d@drobilla.net>2024-06-25 16:51:38 -0400
commitd0c6099462d40a63399d02c2a84237d7fd2f89f8 (patch)
tree9aa496ddd58f6842cc102f2fcfe3896b6f85ca00
parent51cc405d288a078543523efd380ce3c7d11ac501 (diff)
downloadserd-d0c6099462d40a63399d02c2a84237d7fd2f89f8.tar.gz
serd-d0c6099462d40a63399d02c2a84237d7fd2f89f8.tar.bz2
serd-d0c6099462d40a63399d02c2a84237d7fd2f89f8.zip
Reduce size of character classification code
-rw-r--r--src/n3.c53
-rw-r--r--src/node.c32
-rw-r--r--src/writer.c60
3 files changed, 24 insertions, 121 deletions
diff --git a/src/n3.c b/src/n3.c
index 64aa8d7b..073f84d4 100644
--- a/src/n3.c
+++ b/src/n3.c
@@ -56,14 +56,11 @@ read_UCHAR(SerdReader* const reader, const Ref dest, uint32_t* const char_code)
{
const int b = peek_byte(reader);
unsigned length = 0;
- switch (b) {
- case 'U':
+ if (b == 'U') {
length = 8;
- break;
- case 'u':
+ } else if (b == 'u') {
length = 4;
- break;
- default:
+ } else {
return SERD_ERR_BAD_SYNTAX;
}
@@ -239,18 +236,12 @@ read_character(SerdReader* const reader,
const uint8_t c)
{
if (!(c & 0x80)) {
- switch (c) {
- case 0xA:
- case 0xD:
+ if (c == 0xA || c == 0xD) {
*flags |= SERD_HAS_NEWLINE;
- break;
- case '"':
- case '\'':
+ } else if (c == '"' || c == '\'') {
*flags |= SERD_HAS_QUOTE;
- break;
- default:
- break;
}
+
return push_byte(reader, dest, c);
}
@@ -537,34 +528,10 @@ read_PN_LOCAL_ESC(SerdReader* const reader, const Ref dest)
skip_byte(reader, '\\');
const int c = peek_byte(reader);
- switch (c) {
- case '!':
- case '#':
- case '$':
- case '%':
- case '&':
- case '\'':
- case '(':
- case ')':
- case '*':
- case '+':
- case ',':
- case '-':
- case '.':
- case '/':
- case ';':
- case '=':
- case '?':
- case '@':
- case '_':
- case '~':
- push_byte(reader, dest, eat_byte_safe(reader, c));
- break;
- default:
- return r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid escape\n");
- }
-
- return SERD_SUCCESS;
+ return ((c == '!') || in_range(c, '#', '/') || (c == ';') || (c == '=') ||
+ (c == '?') || (c == '@') || (c == '_') || (c == '~'))
+ ? push_byte(reader, dest, eat_byte_safe(reader, c))
+ : r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid escape\n");
}
static SerdStatus
diff --git a/src/node.c b/src/node.c
index 6e0ba486..e435093e 100644
--- a/src/node.c
+++ b/src/node.c
@@ -142,37 +142,7 @@ serd_node_new_uri_from_string(const uint8_t* const str,
static bool
is_uri_path_char(const uint8_t c)
{
- if (is_alpha(c) || is_digit(c)) {
- return true;
- }
-
- switch (c) {
- // unreserved:
- case '-':
- case '.':
- case '_':
- case '~':
- case ':':
-
- case '@': // pchar
- case '/': // separator
-
- // sub-delimiters:
- case '!':
- case '$':
- case '&':
- case '\'':
- case '(':
- case ')':
- case '*':
- case '+':
- case ',':
- case ';':
- case '=':
- return true;
- default:
- return false;
- }
+ return is_alpha(c) || is_digit(c) || strchr("!$&\'()*+,-./:;=@_~", c);
}
static bool
diff --git a/src/writer.c b/src/writer.c
index 08192bb1..e4ef5651 100644
--- a/src/writer.c
+++ b/src/writer.c
@@ -283,21 +283,8 @@ write_character(SerdWriter* writer,
SERD_NODISCARD static bool
uri_must_escape(const uint8_t c)
{
- switch (c) {
- case ' ':
- case '"':
- case '<':
- case '>':
- case '\\':
- case '^':
- case '`':
- case '{':
- case '|':
- case '}':
- return true;
- default:
- return !in_range(c, 0x20, 0x7E);
- }
+ return (c == '"') || (c == '<') || (c == '>') || (c == '\\') || (c == '^') ||
+ (c == '`') || in_range(c, '{', '}') || !in_range(c, 0x21, 0x7E);
}
static size_t
@@ -368,38 +355,17 @@ write_uri_from_node(SerdWriter* writer, const SerdNode* node)
static bool
lname_must_escape(const uint8_t c)
{
- /* This arbitrary list of characters, most of which have nothing to do with
- Turtle, must be handled as special cases here because the RDF and SPARQL
- WGs are apparently intent on making the once elegant Turtle a baroque
- and inconsistent mess, throwing elegance and extensibility completely
- out the window for no good reason.
-
- Note '-', '.', and '_' are also in PN_LOCAL_ESC, but are valid unescaped
- in local names, so they are not escaped here. */
-
- switch (c) {
- case '\'':
- case '!':
- case '#':
- case '$':
- case '%':
- case '&':
- case '(':
- case ')':
- case '*':
- case '+':
- case ',':
- case '/':
- case ';':
- case '=':
- case '?':
- case '@':
- case '~':
- return true;
- default:
- break;
- }
- return false;
+ /* Most of these characters have nothing to do with Turtle, but were taken
+ from SPARQL and mashed into the Turtle grammar (despite not being used)
+ with RDF 1.1. So now Turtle is a mess because the SPARQL grammar is
+ poorly designed and didn't use a leading character to distinguish things
+ like path patterns like it should have.
+
+ Note that '-', '.', and '_' are also in PN_LOCAL_ESC, but are valid
+ unescaped in local names, so they are not escaped here. */
+
+ return (c == '!') || (c == '/') || (c == ';') || (c == '=') || (c == '?') ||
+ (c == '@') || (c == '~') || in_range(c, '#', ',');
}
SERD_NODISCARD static SerdStatus