aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2023-05-04 11:32:35 -0400
committerDavid Robillard <d@drobilla.net>2023-12-02 18:49:08 -0500
commitfa4c14acdeeabaf7aee0304a57ae7878ecea1776 (patch)
tree5b43e86ecac55c9cee0056a2ce7a94c0f01aecbc
parentfac7901dca0b4821582c6062600d37ac987082d9 (diff)
downloadserd-fa4c14acdeeabaf7aee0304a57ae7878ecea1776.tar.gz
serd-fa4c14acdeeabaf7aee0304a57ae7878ecea1776.tar.bz2
serd-fa4c14acdeeabaf7aee0304a57ae7878ecea1776.zip
Add a reader flag to disable generated blank label avoidance
-rw-r--r--doc/man/serd-pipe.18
-rw-r--r--include/serd/reader.h15
-rw-r--r--src/read_ntriples.c15
-rw-r--r--test/meson.build1
-rw-r--r--tools/console.c1
5 files changed, 30 insertions, 10 deletions
diff --git a/doc/man/serd-pipe.1 b/doc/man/serd-pipe.1
index 0ce40dbd..a986c570 100644
--- a/doc/man/serd-pipe.1
+++ b/doc/man/serd-pipe.1
@@ -111,6 +111,14 @@ so that URI references and blank nodes are passed to the sink exactly as they ar
Note that this does not apply to CURIEs, since serd deliberately does not
have a way to represent CURIE nodes.
A bad namespace prefix is considered a syntax error.
+.It Cm generated
+Read seemingly generated blank node labels exactly without adjusting them.
+Normally, blank node labels like
+.Li b123
+are adapted to avoid potential clashes with generated ones.
+This flag disables that,
+so such labels will be passed through exactly as they are in the input.
+Note that this may corrupt the output by merging distinct blank nodes.
.El
.It Fl O Ar syntax
Set an output syntax or option.
diff --git a/include/serd/reader.h b/include/serd/reader.h
index e25565cb..d62428cf 100644
--- a/include/serd/reader.h
+++ b/include/serd/reader.h
@@ -67,6 +67,21 @@ typedef enum {
a syntax error.
*/
SERD_READ_VERBATIM = 1U << 2U,
+
+ /**
+ Read generated blank node labels exactly without adjusting them.
+
+ Normally, the reader will adapt blank node labels in the input that clash
+ with its scheme for generating new ones, for example mapping "_:b123" to
+ "_:B123". This flag disables that, so that blank node labels are passed
+ to the sink exactly as they are in the input.
+
+ Note that this flag should be used carefully, since it can result in data
+ corruption. Specifically, if the input is a syntax like Turtle with
+ anonymous nodes, the generated IDs for those nodes may clash with IDs from
+ the input document.
+ */
+ SERD_READ_GENERATED = 1U << 3U,
} SerdReaderFlag;
/// Bitwise OR of SerdReaderFlag values
diff --git a/src/read_ntriples.c b/src/read_ntriples.c
index 7a43e4c2..57b1e7be 100644
--- a/src/read_ntriples.c
+++ b/src/read_ntriples.c
@@ -17,7 +17,6 @@
#include "serd/caret.h"
#include "serd/sink.h"
#include "serd/statement.h"
-#include "serd/syntax.h"
#include <assert.h>
#include <stdbool.h>
@@ -237,22 +236,18 @@ read_STRING_LITERAL(SerdReader* const reader,
return tolerate_status(reader, st) ? SERD_SUCCESS : st;
}
-static bool
-avoid_blank_clashes(const SerdReader* const reader)
-{
- return (reader->syntax == SERD_TURTLE || reader->syntax == SERD_TRIG) &&
- !(reader->flags & SERD_READ_VERBATIM);
-}
-
static SerdStatus
adjust_blank_id(SerdReader* const reader, char* const buf)
{
- if (avoid_blank_clashes(reader) && is_digit(buf[reader->bprefix_len + 1])) {
+ if (!(reader->flags & SERD_READ_GENERATED) &&
+ is_digit(buf[reader->bprefix_len + 1])) {
const char tag = buf[reader->bprefix_len];
if (tag == 'b') {
- buf[reader->bprefix_len] = 'B'; // Prevent clash
+ // Presumably generated ID like b123 in the input, adjust to B123
+ buf[reader->bprefix_len] = 'B';
reader->seen_genid = true;
} else if (tag == 'B' && reader->seen_genid) {
+ // We've seen both b123 and B123 styles, abort due to possible clashes
return r_err(reader,
SERD_BAD_LABEL,
"found both 'b' and 'B' blank IDs, prefix required");
diff --git a/test/meson.build b/test/meson.build
index 84ae7525..0092af4b 100644
--- a/test/meson.build
+++ b/test/meson.build
@@ -504,6 +504,7 @@ test_suites = {
files('extra/prefix/manifest.ttl'),
ns_serdtest + 'prefix/',
'--',
+ ['-I', 'generated'],
['-p', 'test'],
],
'prefix_remove': [
diff --git a/tools/console.c b/tools/console.c
index 56464696..1a2cb46c 100644
--- a/tools/console.c
+++ b/tools/console.c
@@ -99,6 +99,7 @@ serd_set_input_option(const SerdStringView name,
{"lax", SERD_READ_LAX},
{"variables", SERD_READ_VARIABLES},
{"verbatim", SERD_READ_VERBATIM},
+ {"generated", SERD_READ_GENERATED},
{NULL, SERD_READ_LAX},
};