From 4339b6f9cb0da8a9d6519077f4a0ecc385cc382c Mon Sep 17 00:00:00 2001
From: David Robillard <d@drobilla.net>
Date: Thu, 12 Aug 2021 12:56:03 -0400
Subject: Add a reader flag to disable generated blank label avoidance

---
 include/serd/serd.h | 15 +++++++++++++++
 src/node_syntax.c   |  9 +++++++--
 src/read_ntriples.c |  6 ++++--
 tools/console.c     |  1 +
 4 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/include/serd/serd.h b/include/serd/serd.h
index e99e4e6a..3f59ded6 100644
--- a/include/serd/serd.h
+++ b/include/serd/serd.h
@@ -2299,6 +2299,21 @@ typedef enum {
      a syntax error.
   */
   SERD_READ_VERBATIM = 1u << 2u,
+
+  /**
+     Read generated blank node labels exactly without adjusting them.
+
+     Normally, the reader will adapt blank node labels in the input that clash
+     with its scheme for generating new ones, for example mapping "_:b123" to
+     "_:B123".  This flag disables that, so that blank node labels are passed
+     to the sink exactly as they are in the input.
+
+     Note that this flag should be used carefully, since it can result in data
+     corruption.  Specifically, if the input is a syntax like Turtle with
+     anonymous nodes, the generated IDs for those nodes may clash with IDs from
+     the input document.
+  */
+  SERD_READ_GENERATED = 1u << 3u,
 } SerdReaderFlag;
 
 /// Bitwise OR of SerdReaderFlag values
diff --git a/src/node_syntax.c b/src/node_syntax.c
index 0c45e33a..edf5cbf5 100644
--- a/src/node_syntax.c
+++ b/src/node_syntax.c
@@ -55,8 +55,13 @@ serd_node_from_syntax_in(const char* const str,
   SerdSink* const  sink   = serd_sink_new(&object, on_node_string_event, NULL);
 
   SerdByteSource* const source = serd_byte_source_new_string(doc, NULL);
-  SerdReader* const     reader = serd_reader_new(
-    world, syntax, SERD_READ_VERBATIM, env, sink, 1024 + doc_len);
+  SerdReader* const     reader =
+    serd_reader_new(world,
+                    syntax,
+                    SERD_READ_VERBATIM | SERD_READ_GENERATED,
+                    env,
+                    sink,
+                    1024 + doc_len);
 
   serd_reader_start(reader, source);
   serd_reader_read_document(reader);
diff --git a/src/read_ntriples.c b/src/read_ntriples.c
index aa8f5468..08c489fe 100644
--- a/src/read_ntriples.c
+++ b/src/read_ntriples.c
@@ -256,13 +256,15 @@ read_STRING_LITERAL(SerdReader* const reader,
 static SerdStatus
 adjust_blank_id(SerdReader* const reader, char* const buf)
 {
-  if (!(reader->flags & SERD_READ_VERBATIM) &&
+  if (!(reader->flags & SERD_READ_GENERATED) &&
       is_digit(buf[reader->bprefix_len + 1])) {
     const char tag = buf[reader->bprefix_len];
     if (tag == 'b') {
-      buf[reader->bprefix_len] = 'B'; // Prevent clash
+      // Presumably generated ID like b123 in the input, adjust to B123
+      buf[reader->bprefix_len] = 'B';
       reader->seen_genid       = true;
     } else if (tag == 'B' && reader->seen_genid) {
+      // We've seen both b123 and B123 styles, abort due to possible clashes
       return r_err(reader,
                    SERD_ERR_ID_CLASH,
                    "found both `b' and `B' blank IDs, prefix required");
diff --git a/tools/console.c b/tools/console.c
index e13bf8a5..2a2905d9 100644
--- a/tools/console.c
+++ b/tools/console.c
@@ -70,6 +70,7 @@ serd_set_input_option(const SerdStringView   name,
     {"lax", SERD_READ_LAX},
     {"variables", SERD_READ_VARIABLES},
     {"verbatim", SERD_READ_VERBATIM},
+    {"generated", SERD_READ_GENERATED},
     {NULL, SERD_READ_LAX},
   };
 
-- 
cgit v1.2.1