aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2021-07-31 16:43:50 -0400
committerDavid Robillard <d@drobilla.net>2022-01-28 21:57:07 -0500
commit2bb0250be8297cc950d0036915ecdf61ab6f3700 (patch)
treece8051b08017ac54c3195b529647b8b6422e9639
parent155fceabe7070b6610d577734734d038d097b088 (diff)
downloadserd-2bb0250be8297cc950d0036915ecdf61ab6f3700.tar.gz
serd-2bb0250be8297cc950d0036915ecdf61ab6f3700.tar.bz2
serd-2bb0250be8297cc950d0036915ecdf61ab6f3700.zip
Factor out and expose prefixed name predicates
Towards using these in the writer to escape names more precisely.
-rw-r--r--src/n3.c33
-rw-r--r--src/ntriples.h36
-rw-r--r--src/read_ntriples.c5
-rw-r--r--src/read_ntriples.h26
-rw-r--r--src/turtle.h45
5 files changed, 90 insertions, 55 deletions
diff --git a/src/n3.c b/src/n3.c
index 79a48a93..08c7754d 100644
--- a/src/n3.c
+++ b/src/n3.c
@@ -19,11 +19,13 @@
#include "env.h"
#include "namespaces.h"
#include "node.h"
+#include "ntriples.h"
#include "read_ntriples.h"
#include "reader.h"
#include "stack.h"
#include "string_utils.h"
#include "try.h"
+#include "turtle.h"
#include "serd/serd.h"
@@ -185,33 +187,10 @@ read_PN_LOCAL_ESC(SerdReader* const reader, SerdNode* const dest)
eat_byte_safe(reader, '\\');
const int c = peek_byte(reader);
- switch (c) {
- case '!':
- case '#':
- case '$':
- case '%':
- case '&':
- case '\'':
- case '(':
- case ')':
- case '*':
- case '+':
- case ',':
- case '-':
- case '.':
- case '/':
- case ';':
- case '=':
- case '?':
- case '@':
- case '_':
- case '~':
- return push_byte(reader, dest, eat_byte_safe(reader, c));
- default:
- break;
- }
- return r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid escape");
+ return is_PN_LOCAL_ESC(c)
+ ? push_byte(reader, dest, eat_byte_safe(reader, c))
+ : r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid escape");
}
static SerdStatus
@@ -611,7 +590,7 @@ read_verb(SerdReader* reader, SerdNode** dest)
SerdNode* node = *dest;
const int next = peek_byte(reader);
if (node->length == 1 && serd_node_string(node)[0] == 'a' && next != ':' &&
- !is_PN_CHARS_BASE((uint32_t)next)) {
+ !is_PN_CHARS_BASE(next)) {
serd_stack_pop_to(&reader->stack, orig_stack_size);
return ((*dest = push_node(reader, SERD_URI, NS_RDF "type", 47))
? SERD_SUCCESS
diff --git a/src/ntriples.h b/src/ntriples.h
new file mode 100644
index 00000000..566a8ddc
--- /dev/null
+++ b/src/ntriples.h
@@ -0,0 +1,36 @@
+/*
+ Copyright 2011-2021 David Robillard <d@drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#ifndef SERD_NTRIPLES_H
+#define SERD_NTRIPLES_H
+
+#include "string_utils.h"
+
+#include <stdbool.h>
+
+static inline bool
+is_PN_CHARS_BASE(const int c)
+{
+ return (is_alpha(c) || in_range(c, 0x000C0u, 0x000D6u) ||
+ in_range(c, 0x000D8u, 0x000F6u) || in_range(c, 0x000F8u, 0x002FFu) ||
+ in_range(c, 0x00370u, 0x0037Du) || in_range(c, 0x0037Fu, 0x01FFFu) ||
+ in_range(c, 0x0200Cu, 0x0200Du) || in_range(c, 0x02070u, 0x0218Fu) ||
+ in_range(c, 0x02C00u, 0x02FEFu) || in_range(c, 0x03001u, 0x0D7FFu) ||
+ in_range(c, 0x0F900u, 0x0FDCFu) || in_range(c, 0x0FDF0u, 0x0FFFDu) ||
+ in_range(c, 0x10000u, 0xEFFFFu));
+}
+
+#endif // SERD_NTRIPLES_H
diff --git a/src/read_ntriples.c b/src/read_ntriples.c
index 017c4dcf..0d5dba2a 100644
--- a/src/read_ntriples.c
+++ b/src/read_ntriples.c
@@ -19,6 +19,7 @@
#include "byte_source.h"
#include "caret.h"
#include "node.h"
+#include "ntriples.h"
#include "read_utf8.h"
#include "reader.h"
#include "stack.h"
@@ -453,7 +454,7 @@ read_PN_CHARS_BASE(SerdReader* const reader, SerdNode* const dest)
return st;
}
- if (!is_PN_CHARS_BASE(code)) {
+ if (!is_PN_CHARS_BASE((int)code)) {
r_err(reader,
SERD_ERR_BAD_SYNTAX,
"U+%04X is not a valid name character",
@@ -507,7 +508,7 @@ read_PN_CHARS(SerdReader* const reader, SerdNode* const dest)
return st;
}
- if (!is_PN_CHARS_BASE(code) && code != 0xB7 &&
+ if (!is_PN_CHARS_BASE((int)code) && code != 0xB7 &&
!(code >= 0x0300 && code <= 0x036F) &&
!(code >= 0x203F && code <= 0x2040)) {
return r_err(reader,
diff --git a/src/read_ntriples.h b/src/read_ntriples.h
index a2f6d232..d8577130 100644
--- a/src/read_ntriples.h
+++ b/src/read_ntriples.h
@@ -22,32 +22,6 @@
#include <stdbool.h>
#include <stdint.h>
-// Utilities
-
-static inline bool
-codepoint_in_range(const uint32_t c, const uint32_t min, const uint32_t max)
-{
- return c >= min && c <= max;
-}
-
-static inline bool
-is_PN_CHARS_BASE(const uint32_t c)
-{
- return (codepoint_in_range(c, 'A', 'Z') || codepoint_in_range(c, 'a', 'z') ||
- codepoint_in_range(c, 0x000C0u, 0x000D6u) ||
- codepoint_in_range(c, 0x000D8u, 0x000F6u) ||
- codepoint_in_range(c, 0x000F8u, 0x002FFu) ||
- codepoint_in_range(c, 0x00370u, 0x0037Du) ||
- codepoint_in_range(c, 0x0037Fu, 0x01FFFu) ||
- codepoint_in_range(c, 0x0200Cu, 0x0200Du) ||
- codepoint_in_range(c, 0x02070u, 0x0218Fu) ||
- codepoint_in_range(c, 0x02C00u, 0x02FEFu) ||
- codepoint_in_range(c, 0x03001u, 0x0D7FFu) ||
- codepoint_in_range(c, 0x0F900u, 0x0FDCFu) ||
- codepoint_in_range(c, 0x0FDF0u, 0x0FFFDu) ||
- codepoint_in_range(c, 0x10000u, 0xEFFFFu));
-}
-
/**
Read one (possibly multi-byte) character (possibly multi-byte).
diff --git a/src/turtle.h b/src/turtle.h
new file mode 100644
index 00000000..c521713e
--- /dev/null
+++ b/src/turtle.h
@@ -0,0 +1,45 @@
+/*
+ Copyright 2011-2020 David Robillard <d@drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#ifndef SERD_TURTLE_H
+#define SERD_TURTLE_H
+
+#include "ntriples.h"
+#include "string_utils.h"
+
+#include <stdbool.h>
+#include <string.h>
+
+static inline bool
+is_PN_CHARS_U(const int c)
+{
+ return c == '_' || is_PN_CHARS_BASE(c);
+}
+
+static inline bool
+is_PN_CHARS(const int c)
+{
+ return (is_PN_CHARS_U(c) || c == '-' || in_range(c, '0', '9') || c == 0xB7 ||
+ (c >= 0x0300 && c <= 0x036F) || (c >= 0x203F && c <= 0x2040));
+}
+
+static inline bool
+is_PN_LOCAL_ESC(const int c)
+{
+ return strchr("!#$%&\'()*+,-./;=?@_~", c) != NULL;
+}
+
+#endif // SERD_TURTLE_H