diff options
author | David Robillard <d@drobilla.net> | 2021-07-31 16:43:50 -0400 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2022-01-28 21:57:07 -0500 |
commit | 2bb0250be8297cc950d0036915ecdf61ab6f3700 (patch) | |
tree | ce8051b08017ac54c3195b529647b8b6422e9639 /src | |
parent | 155fceabe7070b6610d577734734d038d097b088 (diff) | |
download | serd-2bb0250be8297cc950d0036915ecdf61ab6f3700.tar.gz serd-2bb0250be8297cc950d0036915ecdf61ab6f3700.tar.bz2 serd-2bb0250be8297cc950d0036915ecdf61ab6f3700.zip |
Factor out and expose prefixed name predicates
Towards using these in the writer to escape names more precisely.
Diffstat (limited to 'src')
-rw-r--r-- | src/n3.c | 33 | ||||
-rw-r--r-- | src/ntriples.h | 36 | ||||
-rw-r--r-- | src/read_ntriples.c | 5 | ||||
-rw-r--r-- | src/read_ntriples.h | 26 | ||||
-rw-r--r-- | src/turtle.h | 45 |
5 files changed, 90 insertions, 55 deletions
@@ -19,11 +19,13 @@ #include "env.h" #include "namespaces.h" #include "node.h" +#include "ntriples.h" #include "read_ntriples.h" #include "reader.h" #include "stack.h" #include "string_utils.h" #include "try.h" +#include "turtle.h" #include "serd/serd.h" @@ -185,33 +187,10 @@ read_PN_LOCAL_ESC(SerdReader* const reader, SerdNode* const dest) eat_byte_safe(reader, '\\'); const int c = peek_byte(reader); - switch (c) { - case '!': - case '#': - case '$': - case '%': - case '&': - case '\'': - case '(': - case ')': - case '*': - case '+': - case ',': - case '-': - case '.': - case '/': - case ';': - case '=': - case '?': - case '@': - case '_': - case '~': - return push_byte(reader, dest, eat_byte_safe(reader, c)); - default: - break; - } - return r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid escape"); + return is_PN_LOCAL_ESC(c) + ? push_byte(reader, dest, eat_byte_safe(reader, c)) + : r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid escape"); } static SerdStatus @@ -611,7 +590,7 @@ read_verb(SerdReader* reader, SerdNode** dest) SerdNode* node = *dest; const int next = peek_byte(reader); if (node->length == 1 && serd_node_string(node)[0] == 'a' && next != ':' && - !is_PN_CHARS_BASE((uint32_t)next)) { + !is_PN_CHARS_BASE(next)) { serd_stack_pop_to(&reader->stack, orig_stack_size); return ((*dest = push_node(reader, SERD_URI, NS_RDF "type", 47)) ? SERD_SUCCESS diff --git a/src/ntriples.h b/src/ntriples.h new file mode 100644 index 00000000..566a8ddc --- /dev/null +++ b/src/ntriples.h @@ -0,0 +1,36 @@ +/* + Copyright 2011-2021 David Robillard <d@drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_NTRIPLES_H +#define SERD_NTRIPLES_H + +#include "string_utils.h" + +#include <stdbool.h> + +static inline bool +is_PN_CHARS_BASE(const int c) +{ + return (is_alpha(c) || in_range(c, 0x000C0u, 0x000D6u) || + in_range(c, 0x000D8u, 0x000F6u) || in_range(c, 0x000F8u, 0x002FFu) || + in_range(c, 0x00370u, 0x0037Du) || in_range(c, 0x0037Fu, 0x01FFFu) || + in_range(c, 0x0200Cu, 0x0200Du) || in_range(c, 0x02070u, 0x0218Fu) || + in_range(c, 0x02C00u, 0x02FEFu) || in_range(c, 0x03001u, 0x0D7FFu) || + in_range(c, 0x0F900u, 0x0FDCFu) || in_range(c, 0x0FDF0u, 0x0FFFDu) || + in_range(c, 0x10000u, 0xEFFFFu)); +} + +#endif // SERD_NTRIPLES_H diff --git a/src/read_ntriples.c b/src/read_ntriples.c index 017c4dcf..0d5dba2a 100644 --- a/src/read_ntriples.c +++ b/src/read_ntriples.c @@ -19,6 +19,7 @@ #include "byte_source.h" #include "caret.h" #include "node.h" +#include "ntriples.h" #include "read_utf8.h" #include "reader.h" #include "stack.h" @@ -453,7 +454,7 @@ read_PN_CHARS_BASE(SerdReader* const reader, SerdNode* const dest) return st; } - if (!is_PN_CHARS_BASE(code)) { + if (!is_PN_CHARS_BASE((int)code)) { r_err(reader, SERD_ERR_BAD_SYNTAX, "U+%04X is not a valid name character", @@ -507,7 +508,7 @@ read_PN_CHARS(SerdReader* const reader, SerdNode* const dest) return st; } - if (!is_PN_CHARS_BASE(code) && code != 0xB7 && + if (!is_PN_CHARS_BASE((int)code) && code != 0xB7 && !(code >= 0x0300 && code <= 0x036F) && !(code >= 0x203F && code <= 0x2040)) { return r_err(reader, diff --git a/src/read_ntriples.h b/src/read_ntriples.h index a2f6d232..d8577130 100644 --- a/src/read_ntriples.h +++ b/src/read_ntriples.h @@ -22,32 +22,6 @@ #include <stdbool.h> #include <stdint.h> -// Utilities - -static inline bool -codepoint_in_range(const uint32_t c, const uint32_t min, const uint32_t max) -{ - return c >= min && c <= max; -} - -static inline bool -is_PN_CHARS_BASE(const uint32_t c) -{ - return (codepoint_in_range(c, 'A', 'Z') || codepoint_in_range(c, 'a', 'z') || - codepoint_in_range(c, 0x000C0u, 0x000D6u) || - codepoint_in_range(c, 0x000D8u, 0x000F6u) || - codepoint_in_range(c, 0x000F8u, 0x002FFu) || - codepoint_in_range(c, 0x00370u, 0x0037Du) || - codepoint_in_range(c, 0x0037Fu, 0x01FFFu) || - codepoint_in_range(c, 0x0200Cu, 0x0200Du) || - codepoint_in_range(c, 0x02070u, 0x0218Fu) || - codepoint_in_range(c, 0x02C00u, 0x02FEFu) || - codepoint_in_range(c, 0x03001u, 0x0D7FFu) || - codepoint_in_range(c, 0x0F900u, 0x0FDCFu) || - codepoint_in_range(c, 0x0FDF0u, 0x0FFFDu) || - codepoint_in_range(c, 0x10000u, 0xEFFFFu)); -} - /** Read one (possibly multi-byte) character (possibly multi-byte). diff --git a/src/turtle.h b/src/turtle.h new file mode 100644 index 00000000..c521713e --- /dev/null +++ b/src/turtle.h @@ -0,0 +1,45 @@ +/* + Copyright 2011-2020 David Robillard <d@drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_TURTLE_H +#define SERD_TURTLE_H + +#include "ntriples.h" +#include "string_utils.h" + +#include <stdbool.h> +#include <string.h> + +static inline bool +is_PN_CHARS_U(const int c) +{ + return c == '_' || is_PN_CHARS_BASE(c); +} + +static inline bool +is_PN_CHARS(const int c) +{ + return (is_PN_CHARS_U(c) || c == '-' || in_range(c, '0', '9') || c == 0xB7 || + (c >= 0x0300 && c <= 0x036F) || (c >= 0x203F && c <= 0x2040)); +} + +static inline bool +is_PN_LOCAL_ESC(const int c) +{ + return strchr("!#$%&\'()*+,-./;=?@_~", c) != NULL; +} + +#endif // SERD_TURTLE_H |