diff options
author | David Robillard <d@drobilla.net> | 2023-05-04 11:32:35 -0400 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2023-12-02 18:49:08 -0500 |
commit | fa4c14acdeeabaf7aee0304a57ae7878ecea1776 (patch) | |
tree | 5b43e86ecac55c9cee0056a2ce7a94c0f01aecbc | |
parent | fac7901dca0b4821582c6062600d37ac987082d9 (diff) | |
download | serd-fa4c14acdeeabaf7aee0304a57ae7878ecea1776.tar.gz serd-fa4c14acdeeabaf7aee0304a57ae7878ecea1776.tar.bz2 serd-fa4c14acdeeabaf7aee0304a57ae7878ecea1776.zip |
Add a reader flag to disable generated blank label avoidance
-rw-r--r-- | doc/man/serd-pipe.1 | 8 | ||||
-rw-r--r-- | include/serd/reader.h | 15 | ||||
-rw-r--r-- | src/read_ntriples.c | 15 | ||||
-rw-r--r-- | test/meson.build | 1 | ||||
-rw-r--r-- | tools/console.c | 1 |
5 files changed, 30 insertions, 10 deletions
diff --git a/doc/man/serd-pipe.1 b/doc/man/serd-pipe.1 index 0ce40dbd..a986c570 100644 --- a/doc/man/serd-pipe.1 +++ b/doc/man/serd-pipe.1 @@ -111,6 +111,14 @@ so that URI references and blank nodes are passed to the sink exactly as they ar Note that this does not apply to CURIEs, since serd deliberately does not have a way to represent CURIE nodes. A bad namespace prefix is considered a syntax error. +.It Cm generated +Read seemingly generated blank node labels exactly without adjusting them. +Normally, blank node labels like +.Li b123 +are adapted to avoid potential clashes with generated ones. +This flag disables that, +so such labels will be passed through exactly as they are in the input. +Note that this may corrupt the output by merging distinct blank nodes. .El .It Fl O Ar syntax Set an output syntax or option. diff --git a/include/serd/reader.h b/include/serd/reader.h index e25565cb..d62428cf 100644 --- a/include/serd/reader.h +++ b/include/serd/reader.h @@ -67,6 +67,21 @@ typedef enum { a syntax error. */ SERD_READ_VERBATIM = 1U << 2U, + + /** + Read generated blank node labels exactly without adjusting them. + + Normally, the reader will adapt blank node labels in the input that clash + with its scheme for generating new ones, for example mapping "_:b123" to + "_:B123". This flag disables that, so that blank node labels are passed + to the sink exactly as they are in the input. + + Note that this flag should be used carefully, since it can result in data + corruption. Specifically, if the input is a syntax like Turtle with + anonymous nodes, the generated IDs for those nodes may clash with IDs from + the input document. + */ + SERD_READ_GENERATED = 1U << 3U, } SerdReaderFlag; /// Bitwise OR of SerdReaderFlag values diff --git a/src/read_ntriples.c b/src/read_ntriples.c index 7a43e4c2..57b1e7be 100644 --- a/src/read_ntriples.c +++ b/src/read_ntriples.c @@ -17,7 +17,6 @@ #include "serd/caret.h" #include "serd/sink.h" #include "serd/statement.h" -#include "serd/syntax.h" #include <assert.h> #include <stdbool.h> @@ -237,22 +236,18 @@ read_STRING_LITERAL(SerdReader* const reader, return tolerate_status(reader, st) ? SERD_SUCCESS : st; } -static bool -avoid_blank_clashes(const SerdReader* const reader) -{ - return (reader->syntax == SERD_TURTLE || reader->syntax == SERD_TRIG) && - !(reader->flags & SERD_READ_VERBATIM); -} - static SerdStatus adjust_blank_id(SerdReader* const reader, char* const buf) { - if (avoid_blank_clashes(reader) && is_digit(buf[reader->bprefix_len + 1])) { + if (!(reader->flags & SERD_READ_GENERATED) && + is_digit(buf[reader->bprefix_len + 1])) { const char tag = buf[reader->bprefix_len]; if (tag == 'b') { - buf[reader->bprefix_len] = 'B'; // Prevent clash + // Presumably generated ID like b123 in the input, adjust to B123 + buf[reader->bprefix_len] = 'B'; reader->seen_genid = true; } else if (tag == 'B' && reader->seen_genid) { + // We've seen both b123 and B123 styles, abort due to possible clashes return r_err(reader, SERD_BAD_LABEL, "found both 'b' and 'B' blank IDs, prefix required"); diff --git a/test/meson.build b/test/meson.build index 84ae7525..0092af4b 100644 --- a/test/meson.build +++ b/test/meson.build @@ -504,6 +504,7 @@ test_suites = { files('extra/prefix/manifest.ttl'), ns_serdtest + 'prefix/', '--', + ['-I', 'generated'], ['-p', 'test'], ], 'prefix_remove': [ diff --git a/tools/console.c b/tools/console.c index 56464696..1a2cb46c 100644 --- a/tools/console.c +++ b/tools/console.c @@ -99,6 +99,7 @@ serd_set_input_option(const SerdStringView name, {"lax", SERD_READ_LAX}, {"variables", SERD_READ_VARIABLES}, {"verbatim", SERD_READ_VERBATIM}, + {"generated", SERD_READ_GENERATED}, {NULL, SERD_READ_LAX}, }; |