aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2021-08-12 12:56:03 -0400
committerDavid Robillard <d@drobilla.net>2022-01-28 21:57:07 -0500
commit4339b6f9cb0da8a9d6519077f4a0ecc385cc382c (patch)
treeaad416437aaac4b0bb47354da7cd2070912f8050
parent90828959c762b0e6d2c318032e714ca39e8e6edb (diff)
downloadserd-4339b6f9cb0da8a9d6519077f4a0ecc385cc382c.tar.gz
serd-4339b6f9cb0da8a9d6519077f4a0ecc385cc382c.tar.bz2
serd-4339b6f9cb0da8a9d6519077f4a0ecc385cc382c.zip
Add a reader flag to disable generated blank label avoidance
-rw-r--r--include/serd/serd.h15
-rw-r--r--src/node_syntax.c9
-rw-r--r--src/read_ntriples.c6
-rw-r--r--tools/console.c1
4 files changed, 27 insertions, 4 deletions
diff --git a/include/serd/serd.h b/include/serd/serd.h
index e99e4e6a..3f59ded6 100644
--- a/include/serd/serd.h
+++ b/include/serd/serd.h
@@ -2299,6 +2299,21 @@ typedef enum {
a syntax error.
*/
SERD_READ_VERBATIM = 1u << 2u,
+
+ /**
+ Read generated blank node labels exactly without adjusting them.
+
+ Normally, the reader will adapt blank node labels in the input that clash
+ with its scheme for generating new ones, for example mapping "_:b123" to
+ "_:B123". This flag disables that, so that blank node labels are passed
+ to the sink exactly as they are in the input.
+
+ Note that this flag should be used carefully, since it can result in data
+ corruption. Specifically, if the input is a syntax like Turtle with
+ anonymous nodes, the generated IDs for those nodes may clash with IDs from
+ the input document.
+ */
+ SERD_READ_GENERATED = 1u << 3u,
} SerdReaderFlag;
/// Bitwise OR of SerdReaderFlag values
diff --git a/src/node_syntax.c b/src/node_syntax.c
index 0c45e33a..edf5cbf5 100644
--- a/src/node_syntax.c
+++ b/src/node_syntax.c
@@ -55,8 +55,13 @@ serd_node_from_syntax_in(const char* const str,
SerdSink* const sink = serd_sink_new(&object, on_node_string_event, NULL);
SerdByteSource* const source = serd_byte_source_new_string(doc, NULL);
- SerdReader* const reader = serd_reader_new(
- world, syntax, SERD_READ_VERBATIM, env, sink, 1024 + doc_len);
+ SerdReader* const reader =
+ serd_reader_new(world,
+ syntax,
+ SERD_READ_VERBATIM | SERD_READ_GENERATED,
+ env,
+ sink,
+ 1024 + doc_len);
serd_reader_start(reader, source);
serd_reader_read_document(reader);
diff --git a/src/read_ntriples.c b/src/read_ntriples.c
index aa8f5468..08c489fe 100644
--- a/src/read_ntriples.c
+++ b/src/read_ntriples.c
@@ -256,13 +256,15 @@ read_STRING_LITERAL(SerdReader* const reader,
static SerdStatus
adjust_blank_id(SerdReader* const reader, char* const buf)
{
- if (!(reader->flags & SERD_READ_VERBATIM) &&
+ if (!(reader->flags & SERD_READ_GENERATED) &&
is_digit(buf[reader->bprefix_len + 1])) {
const char tag = buf[reader->bprefix_len];
if (tag == 'b') {
- buf[reader->bprefix_len] = 'B'; // Prevent clash
+ // Presumably generated ID like b123 in the input, adjust to B123
+ buf[reader->bprefix_len] = 'B';
reader->seen_genid = true;
} else if (tag == 'B' && reader->seen_genid) {
+ // We've seen both b123 and B123 styles, abort due to possible clashes
return r_err(reader,
SERD_ERR_ID_CLASH,
"found both `b' and `B' blank IDs, prefix required");
diff --git a/tools/console.c b/tools/console.c
index e13bf8a5..2a2905d9 100644
--- a/tools/console.c
+++ b/tools/console.c
@@ -70,6 +70,7 @@ serd_set_input_option(const SerdStringView name,
{"lax", SERD_READ_LAX},
{"variables", SERD_READ_VARIABLES},
{"verbatim", SERD_READ_VERBATIM},
+ {"generated", SERD_READ_GENERATED},
{NULL, SERD_READ_LAX},
};