aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2021-07-31 16:43:50 -0400
committerDavid Robillard <d@drobilla.net>2023-12-02 18:49:07 -0500
commit13892da5556b962d749b42c851b29237e380e36c (patch)
tree0d2ccb82e86b57055083f069b990b3516ec05a02
parent0611f1446c37915708ce0ea337c9e84d4cbc3be4 (diff)
downloadserd-13892da5556b962d749b42c851b29237e380e36c.tar.gz
serd-13892da5556b962d749b42c851b29237e380e36c.tar.bz2
serd-13892da5556b962d749b42c851b29237e380e36c.zip
Factor out and expose prefixed name predicates
Towards using these in the writer to escape names more precisely.
-rw-r--r--src/n3.c32
-rw-r--r--src/ntriples.h23
-rw-r--r--src/read_ntriples.c31
-rw-r--r--src/read_ntriples.h8
-rw-r--r--src/turtle.h32
5 files changed, 63 insertions, 63 deletions
diff --git a/src/n3.c b/src/n3.c
index dc133e69..1374ae7c 100644
--- a/src/n3.c
+++ b/src/n3.c
@@ -4,11 +4,13 @@
#include "byte_source.h"
#include "namespaces.h"
#include "node.h"
+#include "ntriples.h"
#include "read_ntriples.h"
#include "reader.h"
#include "stack.h"
#include "string_utils.h"
#include "try.h"
+#include "turtle.h"
#include "serd/node.h"
#include "serd/reader.h"
@@ -170,33 +172,9 @@ read_PN_LOCAL_ESC(SerdReader* const reader, SerdNode* const dest)
skip_byte(reader, '\\');
const int c = peek_byte(reader);
- switch (c) {
- case '!':
- case '#':
- case '$':
- case '%':
- case '&':
- case '\'':
- case '(':
- case ')':
- case '*':
- case '+':
- case ',':
- case '-':
- case '.':
- case '/':
- case ';':
- case '=':
- case '?':
- case '@':
- case '_':
- case '~':
- return push_byte(reader, dest, eat_byte_safe(reader, c));
- default:
- break;
- }
- return r_err(reader, SERD_BAD_SYNTAX, "invalid escape");
+ return is_PN_LOCAL_ESC(c) ? push_byte(reader, dest, eat_byte_safe(reader, c))
+ : r_err(reader, SERD_BAD_SYNTAX, "invalid escape");
}
static SerdStatus
@@ -492,7 +470,7 @@ read_verb(SerdReader* const reader, SerdNode** const dest)
SerdNode* node = *dest;
const int next = peek_byte(reader);
if (node->length == 1 && serd_node_string(node)[0] == 'a' && next != ':' &&
- !is_PN_CHARS_BASE((uint32_t)next)) {
+ !is_PN_CHARS_BASE(next)) {
serd_stack_pop_to(&reader->stack, orig_stack_size);
return ((*dest = push_node(reader, SERD_URI, NS_RDF "type", 47))
? SERD_SUCCESS
diff --git a/src/ntriples.h b/src/ntriples.h
new file mode 100644
index 00000000..404fba71
--- /dev/null
+++ b/src/ntriples.h
@@ -0,0 +1,23 @@
+// Copyright 2011-2021 David Robillard <d@drobilla.net>
+// SPDX-License-Identifier: ISC
+
+#ifndef SERD_SRC_NTRIPLES_H
+#define SERD_SRC_NTRIPLES_H
+
+#include "string_utils.h"
+
+#include <stdbool.h>
+
+static inline bool
+is_PN_CHARS_BASE(const int c)
+{
+ return (is_alpha(c) || in_range(c, 0x000C0U, 0x000D6U) ||
+ in_range(c, 0x000D8U, 0x000F6U) || in_range(c, 0x000F8U, 0x002FFU) ||
+ in_range(c, 0x00370U, 0x0037DU) || in_range(c, 0x0037FU, 0x01FFFU) ||
+ in_range(c, 0x0200CU, 0x0200DU) || in_range(c, 0x02070U, 0x0218FU) ||
+ in_range(c, 0x02C00U, 0x02FEFU) || in_range(c, 0x03001U, 0x0D7FFU) ||
+ in_range(c, 0x0F900U, 0x0FDCFU) || in_range(c, 0x0FDF0U, 0x0FFFDU) ||
+ in_range(c, 0x10000U, 0xEFFFFU));
+}
+
+#endif // SERD_SRC_NTRIPLES_H
diff --git a/src/read_ntriples.c b/src/read_ntriples.c
index 3d3af2ce..96748d64 100644
--- a/src/read_ntriples.c
+++ b/src/read_ntriples.c
@@ -5,6 +5,7 @@
#include "caret.h"
#include "node.h"
+#include "ntriples.h"
#include "read_utf8.h"
#include "reader.h"
#include "stack.h"
@@ -23,32 +24,6 @@
#include <stdint.h>
#include <stdio.h>
-// Utilities
-
-static inline bool
-codepoint_in_range(const uint32_t c, const uint32_t min, const uint32_t max)
-{
- return c >= min && c <= max;
-}
-
-bool
-is_PN_CHARS_BASE(const uint32_t c)
-{
- return (codepoint_in_range(c, 'A', 'Z') || codepoint_in_range(c, 'a', 'z') ||
- codepoint_in_range(c, 0x000C0U, 0x000D6U) ||
- codepoint_in_range(c, 0x000D8U, 0x000F6U) ||
- codepoint_in_range(c, 0x000F8U, 0x002FFU) ||
- codepoint_in_range(c, 0x00370U, 0x0037DU) ||
- codepoint_in_range(c, 0x0037FU, 0x01FFFU) ||
- codepoint_in_range(c, 0x0200CU, 0x0200DU) ||
- codepoint_in_range(c, 0x02070U, 0x0218FU) ||
- codepoint_in_range(c, 0x02C00U, 0x02FEFU) ||
- codepoint_in_range(c, 0x03001U, 0x0D7FFU) ||
- codepoint_in_range(c, 0x0F900U, 0x0FDCFU) ||
- codepoint_in_range(c, 0x0FDF0U, 0x0FFFDU) ||
- codepoint_in_range(c, 0x10000U, 0xEFFFFU));
-}
-
/**
Read an initial prefixed name character.
@@ -484,7 +459,7 @@ read_PN_CHARS_BASE(SerdReader* const reader, SerdNode* const dest)
TRY(st, read_utf8_code_point(reader, dest, &code, (uint8_t)c));
- if (!is_PN_CHARS_BASE(code)) {
+ if (!is_PN_CHARS_BASE((int)code)) {
r_err(
reader, SERD_BAD_SYNTAX, "U+%04X is not a valid name character", code);
if (reader->strict) {
@@ -526,7 +501,7 @@ read_PN_CHARS(SerdReader* const reader, SerdNode* const dest)
uint32_t code = 0U;
TRY(st, read_utf8_code_point(reader, dest, &code, (uint8_t)c));
- if (!is_PN_CHARS_BASE(code) && code != 0xB7 &&
+ if (!is_PN_CHARS_BASE((int)code) && code != 0xB7 &&
!(code >= 0x0300 && code <= 0x036F) &&
!(code >= 0x203F && code <= 0x2040)) {
return r_err(
diff --git a/src/read_ntriples.h b/src/read_ntriples.h
index e6051fde..58daae0c 100644
--- a/src/read_ntriples.h
+++ b/src/read_ntriples.h
@@ -14,14 +14,6 @@
// Utilities
/**
- Return true if the codepoint `c` is a valid PN_CHARS_BASE character.
-
- RDF 1.1 NTriples: [157s] PN_CHARS_BASE
-*/
-bool
-is_PN_CHARS_BASE(uint32_t c);
-
-/**
Read one (possibly multi-byte) character.
The caller must have already eaten the first byte, `c`.
diff --git a/src/turtle.h b/src/turtle.h
new file mode 100644
index 00000000..6e7e3a8d
--- /dev/null
+++ b/src/turtle.h
@@ -0,0 +1,32 @@
+// Copyright 2011-2020 David Robillard <d@drobilla.net>
+// SPDX-License-Identifier: ISC
+
+#ifndef SERD_SRC_TURTLE_H
+#define SERD_SRC_TURTLE_H
+
+#include "ntriples.h"
+#include "string_utils.h"
+
+#include <stdbool.h>
+#include <string.h>
+
+static inline bool
+is_PN_CHARS_U(const int c)
+{
+ return c == '_' || is_PN_CHARS_BASE(c);
+}
+
+static inline bool
+is_PN_CHARS(const int c)
+{
+ return (is_PN_CHARS_U(c) || c == '-' || in_range(c, '0', '9') || c == 0xB7 ||
+ (c >= 0x0300 && c <= 0x036F) || (c >= 0x203F && c <= 0x2040));
+}
+
+static inline bool
+is_PN_LOCAL_ESC(const int c)
+{
+ return strchr("!#$%&\'()*+,-./;=?@_~", c) != NULL;
+}
+
+#endif // SERD_SRC_TURTLE_H