aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.reuse/dep52
-rw-r--r--NEWS3
-rw-r--r--src/n3.c148
-rw-r--r--test/extra/good/manifest.ttl7
-rw-r--r--test/extra/good/test-boolish-prefix.nt2
-rw-r--r--test/extra/good/test-boolish-prefix.ttl5
6 files changed, 98 insertions, 69 deletions
diff --git a/.reuse/dep5 b/.reuse/dep5
index 965881f7..f21e0ee1 100644
--- a/.reuse/dep5
+++ b/.reuse/dep5
@@ -14,7 +14,7 @@ Comment: Standard test suites from the W3C
License: BSD-3-Clause
Files: test/extra/*
-Copyright: 2011-2024 David Robillard <d@drobilla.net>
+Copyright: 2011-2025 David Robillard <d@drobilla.net>
Comment: Extra test suites for serd
License: BSD-3-Clause OR ISC
diff --git a/NEWS b/NEWS
index 3863ef01..fd086bbc 100644
--- a/NEWS
+++ b/NEWS
@@ -3,10 +3,11 @@ serd (0.32.5) unstable; urgency=medium
* Fix handling of some invalid EOF cases in lax mode
* Fix invalid characters in error messages
* Fix reading numbers with no space before the final dot
+ * Fix reading prefix names that start with "true." or "false."
* Refuse to write incoherent statements
* Remove project and version number from man page OS field
- -- David Robillard <d@drobilla.net> Tue, 25 Feb 2025 14:55:48 +0000
+ -- David Robillard <d@drobilla.net> Tue, 25 Feb 2025 22:59:32 +0000
serd (0.32.4) stable; urgency=medium
diff --git a/src/n3.c b/src/n3.c
index cd77eb3b..1c94a976 100644
--- a/src/n3.c
+++ b/src/n3.c
@@ -1,4 +1,4 @@
-// Copyright 2011-2023 David Robillard <d@drobilla.net>
+// Copyright 2011-2025 David Robillard <d@drobilla.net>
// SPDX-License-Identifier: ISC
#include "reader.h"
@@ -594,31 +594,38 @@ read_PN_LOCAL(SerdReader* const reader, const Ref dest, bool* const ate_dot)
// Read the remainder of a PN_PREFIX after some initial characters
static SerdStatus
-read_PN_PREFIX_tail(SerdReader* const reader, const Ref dest)
+read_PN_PREFIX_tail(SerdReader* const reader,
+ const Ref dest,
+ bool* const ate_dot)
{
- int c = 0;
- while ((c = peek_byte(reader)) > 0) { // Middle: (PN_CHARS | '.')*
+ SerdStatus st = SERD_SUCCESS;
+ int c = 0;
+ bool trailing_unescaped_dot = false;
+
+ while (!st && ((c = peek_byte(reader)) > 0)) { // Middle: (PN_CHARS | '.')*
if (c == '.') {
push_byte(reader, dest, eat_byte_safe(reader, c));
- } else if (read_PN_CHARS(reader, dest)) {
- break;
+ trailing_unescaped_dot = true;
+ } else if (!(st = read_PN_CHARS(reader, dest))) {
+ trailing_unescaped_dot = false;
}
}
- const SerdNode* const n = deref(reader, dest);
- if (n->buf[n->n_bytes - 1] == '.' && read_PN_CHARS(reader, dest)) {
- return r_err(reader, SERD_ERR_BAD_SYNTAX, "prefix ends with '.'\n");
+ if (trailing_unescaped_dot) {
+ SerdNode* const n = deref(reader, dest);
+ ((char*)n->buf)[--n->n_bytes] = '\0';
+ *ate_dot = true;
}
- return SERD_SUCCESS;
+ return st;
}
static SerdStatus
-read_PN_PREFIX(SerdReader* const reader, const Ref dest)
+read_PN_PREFIX(SerdReader* const reader, const Ref dest, bool* const ate_dot)
{
const SerdStatus st = read_PN_CHARS_BASE(reader, dest);
- return st ? st : read_PN_PREFIX_tail(reader, dest);
+ return st ? st : read_PN_PREFIX_tail(reader, dest, ate_dot);
}
static SerdStatus
@@ -751,15 +758,10 @@ read_IRIREF(SerdReader* const reader, Ref* const dest)
}
static SerdStatus
-read_PrefixedName(SerdReader* const reader,
- const Ref dest,
- const bool read_prefix,
- bool* const ate_dot)
+read_PrefixedName(SerdReader* const reader, const Ref dest, bool* const ate_dot)
{
SerdStatus st = SERD_SUCCESS;
- if (read_prefix) {
- TRY_FAILING(st, read_PN_PREFIX(reader, dest));
- }
+ TRY_FAILING(st, read_PN_PREFIX(reader, dest, ate_dot));
if (peek_byte(reader) != ':') {
return SERD_FAILURE;
@@ -855,7 +857,7 @@ read_iri(SerdReader* const reader, Ref* const dest, bool* const ate_dot)
}
*dest = push_node(reader, SERD_CURIE, "", 0);
- return read_PrefixedName(reader, *dest, true, ate_dot);
+ return read_PrefixedName(reader, *dest, ate_dot);
}
static SerdStatus
@@ -907,29 +909,30 @@ read_verb(SerdReader* const reader, Ref* const dest)
return read_IRIREF(reader, dest);
}
- /* Either a qname, or "a". Read the prefix first, and if it is in fact
- "a", produce that instead.
- */
- *dest = push_node(reader, SERD_CURIE, "", 0);
+ Ref p = push_node(reader, SERD_CURIE, "", 0);
- SerdStatus st = read_PN_PREFIX(reader, *dest);
- bool ate_dot = false;
- const SerdNode* const node = deref(reader, *dest);
- const int next = peek_byte(reader);
- if (!st && node->n_bytes == 1 && node->buf[0] == 'a' && next != ':' &&
- !is_PN_CHARS_BASE((uint32_t)next)) {
- pop_node(reader, *dest);
- *dest = push_node(reader, SERD_URI, NS_RDF "type", 47);
- return SERD_SUCCESS;
+ // Try to read as a prefixed name
+ bool ate_dot = false;
+ SerdStatus st = read_PrefixedName(reader, p, &ate_dot);
+
+ if (st == SERD_FAILURE) {
+ // Check if this is actually the "a" shorthand
+ const SerdNode* const node = deref(reader, p);
+ if (node->n_bytes == 1 && node->buf[0] == 'a') {
+ pop_node(reader, p);
+ p = push_node(reader, SERD_URI, NS_RDF "type", 47);
+ st = SERD_SUCCESS;
+ } else {
+ st = SERD_ERR_BAD_SYNTAX;
+ }
}
- if (st > SERD_FAILURE ||
- (st = read_PrefixedName(reader, *dest, false, &ate_dot)) || ate_dot) {
- *dest = pop_node(reader, *dest);
- st = st > SERD_FAILURE ? st : SERD_ERR_BAD_SYNTAX;
+ if (st) {
+ pop_node(reader, p);
return r_err(reader, st, "bad verb\n");
}
+ *dest = p;
return SERD_SUCCESS;
}
@@ -1065,6 +1068,40 @@ read_anon(SerdReader* const reader,
: SERD_ERR_BAD_SYNTAX;
}
+// Read a "named" object: a boolean literal or a prefixed name
+static SerdStatus
+read_named_object(SerdReader* const reader,
+ Ref* const dest,
+ Ref* const datatype,
+ bool* const ate_dot)
+{
+ static const char* const XSD_BOOLEAN = NS_XSD "boolean";
+ static const size_t XSD_BOOLEAN_LEN = 40;
+
+ // Try to read as a prefixed name
+ const Ref o = push_node(reader, SERD_CURIE, "", 0);
+ SerdStatus st = read_PrefixedName(reader, o, ate_dot);
+
+ if (st == SERD_FAILURE) {
+ // Check if this is actually a boolean literal
+ SerdNode* const node = deref(reader, o);
+ if ((node->n_bytes == 4 && !memcmp(node->buf, "true", 4)) ||
+ (node->n_bytes == 5 && !memcmp(node->buf, "false", 5))) {
+ node->type = SERD_LITERAL;
+ *datatype = push_node(reader, SERD_URI, XSD_BOOLEAN, XSD_BOOLEAN_LEN);
+ st = SERD_SUCCESS;
+ }
+ }
+
+ if (st) {
+ pop_node(reader, o);
+ return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected prefixed name\n");
+ }
+
+ *dest = o;
+ return SERD_SUCCESS;
+}
+
/* If emit is true: recurses, calling statement_sink for every statement
encountered, and leaves stack in original calling state (i.e. pops
everything it pushes). */
@@ -1074,9 +1111,6 @@ read_object(SerdReader* const reader,
const bool emit,
bool* const ate_dot)
{
- static const char* const XSD_BOOLEAN = NS_XSD "boolean";
- static const size_t XSD_BOOLEAN_LEN = 40;
-
#ifndef NDEBUG
const size_t orig_stack_size = reader->stack.size;
#endif
@@ -1084,7 +1118,6 @@ read_object(SerdReader* const reader,
SerdStatus st = SERD_FAILURE;
bool simple = (ctx->subject != 0);
- SerdNode* node = NULL;
Ref o = 0;
Ref datatype = 0;
Ref lang = 0;
@@ -1134,27 +1167,8 @@ read_object(SerdReader* const reader,
st = read_literal(reader, &o, &datatype, &lang, &flags, ate_dot);
break;
default:
- /* Either a boolean literal, or a qname. Read the prefix first, and if
- it is in fact a "true" or "false" literal, produce that instead.
- */
- o = push_node(reader, SERD_CURIE, "", 0);
- while (!read_PN_CHARS_BASE(reader, o)) {
- }
- node = deref(reader, o);
- if ((node->n_bytes == 4 && !memcmp(node->buf, "true", 4)) ||
- (node->n_bytes == 5 && !memcmp(node->buf, "false", 5))) {
- node->type = SERD_LITERAL;
- datatype = push_node(reader, SERD_URI, XSD_BOOLEAN, XSD_BOOLEAN_LEN);
- st = SERD_SUCCESS;
- } else if (read_PN_PREFIX_tail(reader, o) > SERD_FAILURE) {
- st = SERD_ERR_BAD_SYNTAX;
- } else {
- if ((st = read_PrefixedName(reader, o, false, ate_dot))) {
- st = st > SERD_FAILURE ? st : SERD_ERR_BAD_SYNTAX;
- pop_node(reader, o);
- return r_err(reader, st, "expected prefixed name\n");
- }
- }
+ // Either a boolean literal or a prefixed name
+ st = read_named_object(reader, &o, &datatype, ate_dot);
}
if (!st && simple && o) {
@@ -1431,12 +1445,12 @@ read_prefixID(SerdReader* const reader, const bool sparql, const bool token)
}
read_ws_star(reader);
- Ref name = push_node(reader, SERD_LITERAL, "", 0);
- TRY_FAILING(st, read_PN_PREFIX(reader, name));
-
- if (eat_byte_check(reader, ':') != ':') {
+ Ref name = push_node(reader, SERD_LITERAL, "", 0);
+ bool ate_dot = false;
+ TRY_FAILING(st, read_PN_PREFIX(reader, name, &ate_dot));
+ if (ate_dot || eat_byte_check(reader, ':') != ':') {
pop_node(reader, name);
- return SERD_ERR_BAD_SYNTAX;
+ return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected a prefix name\n");
}
read_ws_star(reader);
diff --git a/test/extra/good/manifest.ttl b/test/extra/good/manifest.ttl
index 2ac0ba59..2e3a64af 100644
--- a/test/extra/good/manifest.ttl
+++ b/test/extra/good/manifest.ttl
@@ -16,6 +16,7 @@
<#test-blank-node-statement>
<#test-blankdot>
<#test-bom>
+ <#test-boolish-prefix>
<#test-changing-base>
<#test-comment-whitespace>
<#test-cr>
@@ -125,6 +126,12 @@
mf:name "test-bom" ;
mf:result <test-bom.nt> .
+<#test-boolish-prefix>
+ a rdft:TestTurtleEval ;
+ mf:action <test-boolish-prefix.ttl> ;
+ mf:name "test-boolish-prefix" ;
+ mf:result <test-boolish-prefix.nt> .
+
<#test-changing-base>
a rdft:TestTurtleEval ;
mf:action <test-changing-base.ttl> ;
diff --git a/test/extra/good/test-boolish-prefix.nt b/test/extra/good/test-boolish-prefix.nt
new file mode 100644
index 00000000..d49eeab2
--- /dev/null
+++ b/test/extra/good/test-boolish-prefix.nt
@@ -0,0 +1,2 @@
+<http://example.org/s> <http://example.org/p> <http://example.org/falseish#o> .
+<http://example.org/s> <http://example.org/p> <http://example.org/trueish#o> .
diff --git a/test/extra/good/test-boolish-prefix.ttl b/test/extra/good/test-boolish-prefix.ttl
new file mode 100644
index 00000000..81ffdf11
--- /dev/null
+++ b/test/extra/good/test-boolish-prefix.ttl
@@ -0,0 +1,5 @@
+@prefix false.ish: <http://example.org/falseish#> .
+@prefix true.ish: <http://example.org/trueish#> .
+
+<http://example.org/s> <http://example.org/p> false.ish:o .
+<http://example.org/s> <http://example.org/p> true.ish:o .