From 3ea3143632e3577ac5794faed1141e460a11a9fb Mon Sep 17 00:00:00 2001 From: David Robillard Date: Thu, 12 Aug 2021 13:23:59 -0400 Subject: Split SERD_READ_VERBATIM into two more precise flags Although the "verbatim" idea is nice and simple, more fine-grained control is necessary since these features (relative URI preservation and blank node label clash avoidance) are useful in different situations. --- doc/man/serd-pipe.1 | 24 ++++++++++++++++-------- include/serd/reader.h | 29 +++++++++++++++++++---------- src/read_turtle.c | 2 +- test/meson.build | 3 ++- tools/console.c | 3 ++- 5 files changed, 40 insertions(+), 21 deletions(-) diff --git a/doc/man/serd-pipe.1 b/doc/man/serd-pipe.1 index a986c570..056d236f 100644 --- a/doc/man/serd-pipe.1 +++ b/doc/man/serd-pipe.1 @@ -103,14 +103,11 @@ Variables can be written in SPARQL style, for example .Li ?var or .Li $var . -.It Cm verbatim -Normally, the reader expands all relative URIs, -and may adjust blank node labels to avoid clashing with generated ones. -This flag disables all of this processing, -so that URI references and blank nodes are passed to the sink exactly as they are in the input. -Note that this does not apply to CURIEs, since serd deliberately does not -have a way to represent CURIE nodes. -A bad namespace prefix is considered a syntax error. +.It Cm relative +Read relative URI references exactly without resolving them. +Normally, all relative URIs are expanded against the base URI when reading. +This flag disables that, +so URI references will be passed through exactly as they are in the input. .It Cm generated Read seemingly generated blank node labels exactly without adjusting them. Normally, blank node labels like @@ -119,6 +116,17 @@ are adapted to avoid potential clashes with generated ones. This flag disables that, so such labels will be passed through exactly as they are in the input. Note that this may corrupt the output by merging distinct blank nodes. +.It Cm global +Assume a clean global namespace for blank node labels, +and do not automatically add prefixes. +Normally, +a prefix like +.Li f1 +is added to blank node labels when reading multiple files, +to prevent labels in different files from clashing. +This option disables that, +so blank node labels will be passed through without any added prefix. +Note that this may corrupt the output by merging distinct blank nodes. .El .It Fl O Ar syntax Set an output syntax or option. diff --git a/include/serd/reader.h b/include/serd/reader.h index d62428cf..34af6d04 100644 --- a/include/serd/reader.h +++ b/include/serd/reader.h @@ -55,18 +55,27 @@ typedef enum { SERD_READ_VARIABLES = 1U << 1U, /** - Read URIs and blank node labels exactly. + Read relative URI references exactly without resolving them. - Normally, the reader expands all relative URIs, and may adjust blank node - labels to avoid clashing with generated ones. This flag disables all of - this processing, so that URI references and blank nodes are passed to the - sink exactly as they are in the input. + Normally, the reader expands all relative URIs against the base URI. This + flag disables that, so that URI references are passed to the sink exactly + as they are in the input. + */ + SERD_READ_RELATIVE = 1U << 2U, + + /** + Read blank node labels without adding a prefix unique to the document. - Note that this does not apply to CURIEs, since serd deliberately does not - have a way to represent CURIE nodes. A bad namespace prefix is considered - a syntax error. + Normally, the reader adds a prefix like "f1", "f2", and so on, to blank + node labels, to separate the namespaces from separate input documents. + This flag disables that, so that blank node labels will be read without + any prefix added. + + Note that this flag should be used carefully, since it can result in data + corruption. Specifically, if data from separate documents parsed with + this flag is combined, the IDs from each document may clash. */ - SERD_READ_VERBATIM = 1U << 2U, + SERD_READ_GLOBAL = 1U << 3U, /** Read generated blank node labels exactly without adjusting them. @@ -81,7 +90,7 @@ typedef enum { anonymous nodes, the generated IDs for those nodes may clash with IDs from the input document. */ - SERD_READ_GENERATED = 1U << 3U, + SERD_READ_GENERATED = 1U << 4U, } SerdReaderFlag; /// Bitwise OR of SerdReaderFlag values diff --git a/src/read_turtle.c b/src/read_turtle.c index f85c956a..d8a5de3e 100644 --- a/src/read_turtle.c +++ b/src/read_turtle.c @@ -372,7 +372,7 @@ read_IRIREF(SerdReader* const reader, SerdNode** const dest) return st; } - return (reader->flags & SERD_READ_VERBATIM) + return (reader->flags & SERD_READ_RELATIVE) ? SERD_SUCCESS : resolve_IRIREF(reader, *dest, string_start_offset); } diff --git a/test/meson.build b/test/meson.build index 0092af4b..2de2348f 100644 --- a/test/meson.build +++ b/test/meson.build @@ -457,7 +457,8 @@ test_suites = { files('extra/perfect/manifest.ttl'), ns_serdtest + 'perfect/', '--', - ['-I', 'verbatim'], + ['-I', 'global'], + ['-I', 'relative'], ['-O', 'verbatim'], ], 'full': [ diff --git a/tools/console.c b/tools/console.c index 1a2cb46c..94c6dc79 100644 --- a/tools/console.c +++ b/tools/console.c @@ -98,7 +98,8 @@ serd_set_input_option(const SerdStringView name, static const InputOption input_options[] = { {"lax", SERD_READ_LAX}, {"variables", SERD_READ_VARIABLES}, - {"verbatim", SERD_READ_VERBATIM}, + {"relative", SERD_READ_RELATIVE}, + {"global", SERD_READ_GLOBAL}, {"generated", SERD_READ_GENERATED}, {NULL, SERD_READ_LAX}, }; -- cgit v1.2.1