From da7940afb82d8d1dd5321b311f27340f5702aea2 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Thu, 12 Aug 2021 13:23:59 -0400 Subject: Split SERD_READ_VERBATIM into two more precise flags Although the "verbatim" idea is nice and simple, more fine-grained control is necessary since these features (relative URI preservation and blank node label clash avoidance) are useful in different situations. --- include/serd/serd.h | 31 ++++++++++++++++++++----------- src/n3.c | 2 +- src/node_syntax.c | 2 +- tools/console.c | 3 ++- 4 files changed, 24 insertions(+), 14 deletions(-) diff --git a/include/serd/serd.h b/include/serd/serd.h index 3f59ded6..c30c656b 100644 --- a/include/serd/serd.h +++ b/include/serd/serd.h @@ -2287,18 +2287,27 @@ typedef enum { SERD_READ_VARIABLES = 1u << 1u, /** - Read URIs and blank node labels exactly. + Read relative URI references exactly without resolving them. - Normally, the reader expands all relative URIs, and may adjust blank node - labels to avoid clashing with generated ones. This flag disables all of - this processing, so that URI references and blank nodes are passed to the - sink exactly as they are in the input. + Normally, the reader expands all relative URIs against the base URI. This + flag disables that, so that URI references are passed to the sink exactly + as they are in the input. + */ + SERD_READ_RELATIVE = 1u << 2u, + + /** + Read blank node labels without adding a prefix unique to the document. - Note that this does not apply to CURIEs, since serd deliberately does not - have a way to represent CURIE nodes. A bad namespace prefix is considered - a syntax error. + Normally, the reader adds a prefix like "f1", "f2", and so on, to blank + node labels, to separate the namespaces from separate input documents. + This flag disables that, so that blank node labels will be read without + any prefix added. + + Note that this flag should be used carefully, since it can result in data + corruption. Specifically, if data from separate documents parsed with + this flag is combined, the IDs from each document may clash. */ - SERD_READ_VERBATIM = 1u << 2u, + SERD_READ_GLOBAL = 1u << 3u, /** Read generated blank node labels exactly without adjusting them. @@ -2313,7 +2322,7 @@ typedef enum { anonymous nodes, the generated IDs for those nodes may clash with IDs from the input document. */ - SERD_READ_GENERATED = 1u << 3u, + SERD_READ_GENERATED = 1u << 4u, } SerdReaderFlag; /// Bitwise OR of SerdReaderFlag values @@ -2507,7 +2516,7 @@ typedef enum { potentially writem them as relative URI references. This flag disables that, so URI nodes are written exactly as they are received. - When fed by a reader with #SERD_READ_VERBATIM enabled, this will write URI + When fed by a reader with #SERD_READ_RELATIVE enabled, this will write URI references exactly as they are in the input. */ SERD_WRITE_VERBATIM = 1u << 2u, diff --git a/src/n3.c b/src/n3.c index 6d4210b4..ff142976 100644 --- a/src/n3.c +++ b/src/n3.c @@ -374,7 +374,7 @@ read_IRIREF(SerdReader* const reader, SerdNode** const dest) return st; } - return (reader->flags & SERD_READ_VERBATIM) + return (reader->flags & SERD_READ_RELATIVE) ? SERD_SUCCESS : resolve_IRIREF(reader, *dest, string_start_offset); } diff --git a/src/node_syntax.c b/src/node_syntax.c index edf5cbf5..1dadc7b8 100644 --- a/src/node_syntax.c +++ b/src/node_syntax.c @@ -58,7 +58,7 @@ serd_node_from_syntax_in(const char* const str, SerdReader* const reader = serd_reader_new(world, syntax, - SERD_READ_VERBATIM | SERD_READ_GENERATED, + SERD_READ_RELATIVE | SERD_READ_GLOBAL | SERD_READ_GENERATED, env, sink, 1024 + doc_len); diff --git a/tools/console.c b/tools/console.c index 2a2905d9..ea5fd7ee 100644 --- a/tools/console.c +++ b/tools/console.c @@ -69,7 +69,8 @@ serd_set_input_option(const SerdStringView name, static const InputOption input_options[] = { {"lax", SERD_READ_LAX}, {"variables", SERD_READ_VARIABLES}, - {"verbatim", SERD_READ_VERBATIM}, + {"relative", SERD_READ_RELATIVE}, + {"global", SERD_READ_GLOBAL}, {"generated", SERD_READ_GENERATED}, {NULL, SERD_READ_LAX}, }; -- cgit v1.2.1