diff options
author | David Robillard <d@drobilla.net> | 2021-07-31 16:43:50 -0400 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2023-12-02 18:49:07 -0500 |
commit | 13892da5556b962d749b42c851b29237e380e36c (patch) | |
tree | 0d2ccb82e86b57055083f069b990b3516ec05a02 | |
parent | 0611f1446c37915708ce0ea337c9e84d4cbc3be4 (diff) | |
download | serd-13892da5556b962d749b42c851b29237e380e36c.tar.gz serd-13892da5556b962d749b42c851b29237e380e36c.tar.bz2 serd-13892da5556b962d749b42c851b29237e380e36c.zip |
Factor out and expose prefixed name predicates
Towards using these in the writer to escape names more precisely.
-rw-r--r-- | src/n3.c | 32 | ||||
-rw-r--r-- | src/ntriples.h | 23 | ||||
-rw-r--r-- | src/read_ntriples.c | 31 | ||||
-rw-r--r-- | src/read_ntriples.h | 8 | ||||
-rw-r--r-- | src/turtle.h | 32 |
5 files changed, 63 insertions, 63 deletions
@@ -4,11 +4,13 @@ #include "byte_source.h" #include "namespaces.h" #include "node.h" +#include "ntriples.h" #include "read_ntriples.h" #include "reader.h" #include "stack.h" #include "string_utils.h" #include "try.h" +#include "turtle.h" #include "serd/node.h" #include "serd/reader.h" @@ -170,33 +172,9 @@ read_PN_LOCAL_ESC(SerdReader* const reader, SerdNode* const dest) skip_byte(reader, '\\'); const int c = peek_byte(reader); - switch (c) { - case '!': - case '#': - case '$': - case '%': - case '&': - case '\'': - case '(': - case ')': - case '*': - case '+': - case ',': - case '-': - case '.': - case '/': - case ';': - case '=': - case '?': - case '@': - case '_': - case '~': - return push_byte(reader, dest, eat_byte_safe(reader, c)); - default: - break; - } - return r_err(reader, SERD_BAD_SYNTAX, "invalid escape"); + return is_PN_LOCAL_ESC(c) ? push_byte(reader, dest, eat_byte_safe(reader, c)) + : r_err(reader, SERD_BAD_SYNTAX, "invalid escape"); } static SerdStatus @@ -492,7 +470,7 @@ read_verb(SerdReader* const reader, SerdNode** const dest) SerdNode* node = *dest; const int next = peek_byte(reader); if (node->length == 1 && serd_node_string(node)[0] == 'a' && next != ':' && - !is_PN_CHARS_BASE((uint32_t)next)) { + !is_PN_CHARS_BASE(next)) { serd_stack_pop_to(&reader->stack, orig_stack_size); return ((*dest = push_node(reader, SERD_URI, NS_RDF "type", 47)) ? SERD_SUCCESS diff --git a/src/ntriples.h b/src/ntriples.h new file mode 100644 index 00000000..404fba71 --- /dev/null +++ b/src/ntriples.h @@ -0,0 +1,23 @@ +// Copyright 2011-2021 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#ifndef SERD_SRC_NTRIPLES_H +#define SERD_SRC_NTRIPLES_H + +#include "string_utils.h" + +#include <stdbool.h> + +static inline bool +is_PN_CHARS_BASE(const int c) +{ + return (is_alpha(c) || in_range(c, 0x000C0U, 0x000D6U) || + in_range(c, 0x000D8U, 0x000F6U) || in_range(c, 0x000F8U, 0x002FFU) || + in_range(c, 0x00370U, 0x0037DU) || in_range(c, 0x0037FU, 0x01FFFU) || + in_range(c, 0x0200CU, 0x0200DU) || in_range(c, 0x02070U, 0x0218FU) || + in_range(c, 0x02C00U, 0x02FEFU) || in_range(c, 0x03001U, 0x0D7FFU) || + in_range(c, 0x0F900U, 0x0FDCFU) || in_range(c, 0x0FDF0U, 0x0FFFDU) || + in_range(c, 0x10000U, 0xEFFFFU)); +} + +#endif // SERD_SRC_NTRIPLES_H diff --git a/src/read_ntriples.c b/src/read_ntriples.c index 3d3af2ce..96748d64 100644 --- a/src/read_ntriples.c +++ b/src/read_ntriples.c @@ -5,6 +5,7 @@ #include "caret.h" #include "node.h" +#include "ntriples.h" #include "read_utf8.h" #include "reader.h" #include "stack.h" @@ -23,32 +24,6 @@ #include <stdint.h> #include <stdio.h> -// Utilities - -static inline bool -codepoint_in_range(const uint32_t c, const uint32_t min, const uint32_t max) -{ - return c >= min && c <= max; -} - -bool -is_PN_CHARS_BASE(const uint32_t c) -{ - return (codepoint_in_range(c, 'A', 'Z') || codepoint_in_range(c, 'a', 'z') || - codepoint_in_range(c, 0x000C0U, 0x000D6U) || - codepoint_in_range(c, 0x000D8U, 0x000F6U) || - codepoint_in_range(c, 0x000F8U, 0x002FFU) || - codepoint_in_range(c, 0x00370U, 0x0037DU) || - codepoint_in_range(c, 0x0037FU, 0x01FFFU) || - codepoint_in_range(c, 0x0200CU, 0x0200DU) || - codepoint_in_range(c, 0x02070U, 0x0218FU) || - codepoint_in_range(c, 0x02C00U, 0x02FEFU) || - codepoint_in_range(c, 0x03001U, 0x0D7FFU) || - codepoint_in_range(c, 0x0F900U, 0x0FDCFU) || - codepoint_in_range(c, 0x0FDF0U, 0x0FFFDU) || - codepoint_in_range(c, 0x10000U, 0xEFFFFU)); -} - /** Read an initial prefixed name character. @@ -484,7 +459,7 @@ read_PN_CHARS_BASE(SerdReader* const reader, SerdNode* const dest) TRY(st, read_utf8_code_point(reader, dest, &code, (uint8_t)c)); - if (!is_PN_CHARS_BASE(code)) { + if (!is_PN_CHARS_BASE((int)code)) { r_err( reader, SERD_BAD_SYNTAX, "U+%04X is not a valid name character", code); if (reader->strict) { @@ -526,7 +501,7 @@ read_PN_CHARS(SerdReader* const reader, SerdNode* const dest) uint32_t code = 0U; TRY(st, read_utf8_code_point(reader, dest, &code, (uint8_t)c)); - if (!is_PN_CHARS_BASE(code) && code != 0xB7 && + if (!is_PN_CHARS_BASE((int)code) && code != 0xB7 && !(code >= 0x0300 && code <= 0x036F) && !(code >= 0x203F && code <= 0x2040)) { return r_err( diff --git a/src/read_ntriples.h b/src/read_ntriples.h index e6051fde..58daae0c 100644 --- a/src/read_ntriples.h +++ b/src/read_ntriples.h @@ -14,14 +14,6 @@ // Utilities /** - Return true if the codepoint `c` is a valid PN_CHARS_BASE character. - - RDF 1.1 NTriples: [157s] PN_CHARS_BASE -*/ -bool -is_PN_CHARS_BASE(uint32_t c); - -/** Read one (possibly multi-byte) character. The caller must have already eaten the first byte, `c`. diff --git a/src/turtle.h b/src/turtle.h new file mode 100644 index 00000000..6e7e3a8d --- /dev/null +++ b/src/turtle.h @@ -0,0 +1,32 @@ +// Copyright 2011-2020 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#ifndef SERD_SRC_TURTLE_H +#define SERD_SRC_TURTLE_H + +#include "ntriples.h" +#include "string_utils.h" + +#include <stdbool.h> +#include <string.h> + +static inline bool +is_PN_CHARS_U(const int c) +{ + return c == '_' || is_PN_CHARS_BASE(c); +} + +static inline bool +is_PN_CHARS(const int c) +{ + return (is_PN_CHARS_U(c) || c == '-' || in_range(c, '0', '9') || c == 0xB7 || + (c >= 0x0300 && c <= 0x036F) || (c >= 0x203F && c <= 0x2040)); +} + +static inline bool +is_PN_LOCAL_ESC(const int c) +{ + return strchr("!#$%&\'()*+,-./;=?@_~", c) != NULL; +} + +#endif // SERD_SRC_TURTLE_H |