From dc01b7e301e91d0d7bfc358f569f4f3849471c52 Mon Sep 17 00:00:00 2001
From: David Robillard <d@drobilla.net>
Date: Sun, 1 Aug 2021 20:09:38 -0400
Subject: Collapse input and output options into a single flag

---
 doc/serdi.1            | 89 ++++++++++++++++++++++++++++++++++----------------
 test/meson.build       |  9 ++---
 test/run_test_suite.py | 13 +++++---
 tools/console.c        | 71 ++++++++++++++++++++++++++++++++++++++++
 tools/console.h        | 10 ++++++
 tools/serdi.c          | 36 ++++++++------------
 6 files changed, 168 insertions(+), 60 deletions(-)

diff --git a/doc/serdi.1 b/doc/serdi.1
index 99652738..698bec29 100644
--- a/doc/serdi.1
+++ b/doc/serdi.1
@@ -6,7 +6,7 @@
 .Nd read, filter, transform, and write RDF data
 .Sh SYNOPSIS
 .Nm serdi
-.Op Fl Cafhlmqtvx
+.Op Fl Cfhmqv
 .Op Fl F Ar pattern | Fl G Ar pattern
 .Op Fl I Ar base
 .Op Fl b Ar bytes
@@ -74,10 +74,6 @@ the URI of the file is automatically used as the base URI.
 This option can be used to override that,
 or to provide a base URI for input from stdin or a string.
 .Pp
-.It Fl a
-Write ASCII output.
-If this is enabled, all non-ASCII characters will be escaped, even if the output syntax allows them to be written in UTF-8.
-.Pp
 .It Fl b Ar bytes
 I/O block size.
 This is the number of bytes in a file that will be read or written at once.
@@ -102,15 +98,44 @@ since blank nodes will not be inlined.
 Print the command line options.
 .Pp
 .It Fl i Ar syntax
-Read input as
-.Ar syntax .
-Case is ignored, valid values are:
+Set an input syntax option.
+May be given multiple times.
+The case-insensitive
+.Ar syntax
+can be either a syntax name or an input syntax option.
+The supported syntaxes are
 .Dq NQuads ,
 .Dq NTriples ,
 .Dq TriG ,
 and
 .Dq Turtle .
 .Pp
+The supported input options are:
+.Pp
+.Bl -tag -width "QvariablesQ" -compact -offset indent
+.It Dq lax
+Tolerate invalid input where possible.
+Warnings will be printed on syntax errors,
+but parsing will attempt to continue.
+Note that data may be lost when using this option!
+.Pp
+.It Dq variables
+Support parsing variable nodes.
+Variables can be written in SPARQL style, for example
+.Dq ?var
+or
+.Dq $var .
+.Pp
+.It Dq verbatim
+Normally, the reader expands all relative URIs,
+and may adjust blank node labels to avoid clashing with generated ones.
+This flag disables all of this processing,
+so that URI references and blank nodes are passed to the sink exactly as they are in the input.
+Note that this does not apply to CURIEs, since serd deliberately does not
+have a way to represent CURIE nodes.  A bad namespace prefix is considered
+a syntax error.
+.El
+.Pp
 .It Fl k Ar bytes
 Parser stack size.
 For performance and security reasons, parsing is performed with a fixed-size stack.
@@ -119,11 +144,6 @@ If some data has very deep nesting or very large literal values,
 it may exceed the default amount of space,
 and this option can be used to increase it and allow the document to be parsed successfully.
 .Pp
-.It Fl l
-Lax (non-strict) parsing.
-If this is enabled, recoverable syntax errors will print a warning, but parsing will proceed starting at the next statement if possible.
-Note that data may be lost when using this option.
-.Pp
 .It Fl m
 Build a model in memory.
 This loads all of the input into memory before writing the output.
@@ -131,18 +151,39 @@ This will reorder statements and eliminate duplicates, at the cost of performanc
 When writing TriG or Turtle, this may enable better pretty-printing with more inline descriptions.
 .Pp
 .It Fl o Ar syntax
-Write output as
-.Ar syntax .
-Case is ignored, valid values are:
+Set an output syntax option.
+May be given multiple times.
+The case-insensitive
+.Ar syntax
+can be either a syntax name or an output syntax option.
+The supported syntaxes are
 .Dq empty ,
 .Dq NQuads ,
 .Dq NTriples ,
 .Dq TriG ,
 and
 .Dq Turtle .
-When
-.Dq empty
-is given, output is suppressed, so only errors will be printed.
+.Pp
+The supported output options are:
+.Pp
+.Bl -tag -width "QverbatimQ" -compact -offset indent
+.It Dq ascii
+Escape all non-ASCII characters.
+.Pp
+.It Dq expanded
+Write expanded URIs instead of prefixed names.
+.Pp
+.It Dq verbatim
+Write URI references exactly as they are in the input.
+This avoids resolving URIs and making them relative to the output base URI.
+.Pp
+.It Dq terse
+Write terser output without newlines.
+.Pp
+.It Dq lax
+Tolerate invalid UTF-8 by writing the replacement character when necessary.
+Note that data may be lost when using this option!
+.El
 .Pp
 .It Fl p Ar prefix
 Add
@@ -167,9 +208,6 @@ Parse
 .Ar string
 as input.
 .Pp
-.It Fl t
-Write terser output without newlines.
-.Pp
 .It Fl v
 Display version information and exit.
 .Pp
@@ -177,13 +215,6 @@ Display version information and exit.
 Write output to the given
 .Ar filename
 instead of stdout.
-.Pp
-.It Fl x
-Support parsing variable nodes.
-Variables can be written in SPARQL style, for example
-.Dq ?var
-or
-.Dq $var .
 .El
 .Sh EXIT STATUS
 .Nm
diff --git a/test/meson.build b/test/meson.build
index 2cf3f32c..95fa9587 100644
--- a/test/meson.build
+++ b/test/meson.build
@@ -247,12 +247,12 @@ if is_variable('serdi')
          timeout: 240)
   endforeach
 
-  ### The terse suite needs to be run with -t
+  ### The terse suite needs to be run with -o terse
   test('terse', run_test_suite,
        args: script_args + ['--osyntax', 'turtle',
                             files('terse/manifest.ttl'),
                             serd_base + 'terse/',
-                            '--', '-t'],
+                            '--', '-o', 'terse'],
        env: test_env,
        suite: ['rdf', 'serd'],
        timeout: 240)
@@ -260,7 +260,7 @@ if is_variable('serdi')
   manifest = files('pattern' / 'manifest.ttl')
   base_uri = serd_base + 'pattern' + '/'
   test('pattern', run_test_suite,
-       args: script_args + [manifest, base_uri, '--', '-x'],
+       args: script_args + [manifest, base_uri, '--', '-i', 'variables'],
        env: test_env,
        suite: ['rdf', 'serd'],
        timeout: 240)
@@ -287,7 +287,8 @@ if is_variable('serdi')
 
   ### ... and once with lax parsing to tolerate them
   test('lax.lax', run_test_suite,
-       args: script_args + [lax_manifest, lax_base_uri, '--', '-l'],
+       args: script_args + [lax_manifest, lax_base_uri, '--',
+                            '-i', 'lax', '-o', 'lax'],
        env: test_env,
        is_parallel: false,
        suite: ['rdf', 'serd'],
diff --git a/test/run_test_suite.py b/test/run_test_suite.py
index 0d865c1f..1c5a7cf3 100755
--- a/test/run_test_suite.py
+++ b/test/run_test_suite.py
@@ -26,7 +26,7 @@ def log_error(message):
 
 def test_osyntax_options(osyntax):
     if osyntax.lower() == "ntriples" or osyntax.lower() == "nquads":
-        return ["-a"]
+        return ["-o", "ascii"]
 
     return []
 
@@ -80,7 +80,8 @@ def test_thru(
             "foo",
             "-w",
             thru_path,
-            "-a",
+            "-o",
+            "ascii",
             "-I",
             base_uri,
             out_path,
@@ -243,9 +244,10 @@ def test_suite(
             test_path = os.path.join(test_dir, test_name)
 
             command = command_prefix + [
-                "-a",
                 "-o",
                 osyntax,
+                "-o",
+                "ascii",
                 "-I",
                 test_uri,
                 test_path,
@@ -305,9 +307,10 @@ def test_suite(
 
                     model_command = command_prefix + [
                         "-m",
-                        "-a",
                         "-o",
                         osyntax,
+                        "-o",
+                        "ascii",
                         "-w",
                         out_filename,
                         "-I",
@@ -365,7 +368,7 @@ def test_suite(
         if test_class.startswith(ns_rdftest):
             expected = (
                 1
-                if "-l" not in command_prefix and "Negative" in test_class
+                if "lax" not in command_prefix and "Negative" in test_class
                 else 0
             )
             run_tests(test_class, instances, expected, results)
diff --git a/tools/console.c b/tools/console.c
index df1bc2ff..339aca29 100644
--- a/tools/console.c
+++ b/tools/console.c
@@ -56,6 +56,77 @@ serd_print_version(const char* const program)
   return 0;
 }
 
+SerdStatus
+serd_set_input_option(const SerdStringView   name,
+                      SerdSyntax* const      syntax,
+                      SerdReaderFlags* const flags)
+{
+  typedef struct {
+    const char*    name;
+    SerdReaderFlag flag;
+  } InputOption;
+
+  static const InputOption input_options[] = {
+    {"lax", SERD_READ_LAX},
+    {"variables", SERD_READ_VARIABLES},
+    {"verbatim", SERD_READ_VERBATIM},
+    {NULL, SERD_READ_LAX},
+  };
+
+  const SerdSyntax named_syntax = serd_syntax_by_name(name.buf);
+  if (!serd_strncasecmp(name.buf, "empty", name.len) ||
+      named_syntax != SERD_SYNTAX_EMPTY) {
+    *syntax = named_syntax;
+    return SERD_SUCCESS;
+  }
+
+  for (const InputOption* o = input_options; o->name; ++o) {
+    if (!serd_strncasecmp(o->name, name.buf, name.len)) {
+      *flags |= o->flag;
+      return SERD_SUCCESS;
+    }
+  }
+
+  //  SERDI_ERRORF("invalid input option `%s'\n", name.buf);
+  return SERD_FAILURE;
+}
+
+SerdStatus
+serd_set_output_option(const SerdStringView   name,
+                       SerdSyntax* const      syntax,
+                       SerdWriterFlags* const flags)
+{
+  typedef struct {
+    const char*    name;
+    SerdWriterFlag flag;
+  } OutputOption;
+
+  static const OutputOption output_options[] = {
+    {"ascii", SERD_WRITE_ASCII},
+    {"expanded", SERD_WRITE_EXPANDED},
+    {"verbatim", SERD_WRITE_VERBATIM},
+    {"terse", SERD_WRITE_TERSE},
+    {"lax", SERD_WRITE_LAX},
+    {NULL, SERD_WRITE_ASCII},
+  };
+
+  const SerdSyntax named_syntax = serd_syntax_by_name(name.buf);
+  if (!serd_strncasecmp(name.buf, "empty", name.len) ||
+      named_syntax != SERD_SYNTAX_EMPTY) {
+    *syntax = named_syntax;
+    return SERD_SUCCESS;
+  }
+
+  for (const OutputOption* o = output_options; o->name; ++o) {
+    if (!serd_strncasecmp(o->name, name.buf, name.len)) {
+      *flags |= o->flag;
+      return SERD_SUCCESS;
+    }
+  }
+
+  return SERD_FAILURE;
+}
+
 /// Wrapper for getc that is compatible with SerdReadFunc but faster than fread
 static size_t
 serd_file_read_byte(void* buf, size_t size, size_t nmemb, void* stream)
diff --git a/tools/console.h b/tools/console.h
index 31076b24..5d174f1e 100644
--- a/tools/console.h
+++ b/tools/console.h
@@ -24,6 +24,16 @@ serd_set_stream_utf8_mode(FILE* stream);
 int
 serd_print_version(const char* program);
 
+SerdStatus
+serd_set_input_option(SerdStringView   name,
+                      SerdSyntax*      syntax,
+                      SerdReaderFlags* flags);
+
+SerdStatus
+serd_set_output_option(SerdStringView   name,
+                       SerdSyntax*      syntax,
+                       SerdWriterFlags* flags);
+
 SerdByteSource*
 serd_open_input(const char* filename, size_t block_size);
 
diff --git a/tools/serdi.c b/tools/serdi.c
index f46cab48..db1cfde0 100644
--- a/tools/serdi.c
+++ b/tools/serdi.c
@@ -45,24 +45,22 @@ print_usage(const char* const name, const bool error)
     "  -F PATTERN   Filter out statements that match PATTERN.\n"
     "  -G PATTERN   Only include statements matching PATTERN.\n"
     "  -I BASE_URI  Input base URI.\n"
-    "  -a           Write ASCII output if possible.\n"
     "  -b BYTES     I/O block size.\n"
     "  -c PREFIX    Chop PREFIX from matching blank node IDs.\n"
     "  -f           Fast and loose mode (possibly ugly output).\n"
     "  -h           Display this help and exit.\n"
-    "  -i SYNTAX    Input syntax: turtle/ntriples/trig/nquads.\n"
+    "  -i SYNTAX    Input syntax (turtle/ntriples/trig/nquads),\n"
+    "               or flag (lax/variables/verbatim).\n"
     "  -k BYTES     Parser stack size.\n"
-    "  -l           Lax (non-strict) parsing.\n"
     "  -m           Build a model in memory before writing.\n"
-    "  -o SYNTAX    Output syntax: empty/turtle/ntriples/nquads.\n"
+    "  -o SYNTAX    Output syntax (empty/turtle/ntriples/nquads),\n"
+    "               or flag (ascii/expanded/verbatim/terse/lax).\n"
     "  -p PREFIX    Add PREFIX to blank node IDs.\n"
     "  -q           Suppress all output except data.\n"
     "  -r ROOT_URI  Keep relative URIs within ROOT_URI.\n"
     "  -s STRING    Parse STRING as input.\n"
-    "  -t           Write terser output without newlines.\n"
     "  -v           Display version information and exit.\n"
-    "  -w FILENAME  Write output to FILENAME instead of stdout.\n"
-    "  -x           Support parsing variable nodes like `?x'.\n";
+    "  -w FILENAME  Write output to FILENAME instead of stdout.\n";
 
   FILE* const os = error ? stderr : stdout;
   fprintf(os, "%s", error ? "\n" : "");
@@ -207,26 +205,17 @@ main(int argc, char** argv)
 
       if (opt == 'C') {
         canonical = true;
-      } else if (opt == 'a') {
-        writer_flags |= SERD_WRITE_ASCII;
       } else if (opt == 'f') {
         no_inline = true;
         writer_flags |= (SERD_WRITE_EXPANDED | SERD_WRITE_VERBATIM);
       } else if (opt == 'h') {
         return print_usage(prog, false);
-      } else if (opt == 'l') {
-        reader_flags |= SERD_READ_LAX;
-        writer_flags |= SERD_WRITE_LAX;
       } else if (argv[a][1] == 'm') {
         use_model = true;
       } else if (opt == 'q') {
         quiet = true;
-      } else if (opt == 't') {
-        writer_flags |= SERD_WRITE_TERSE;
       } else if (opt == 'v') {
         return serd_print_version(argv[0]);
-      } else if (opt == 'x') {
-        reader_flags |= SERD_READ_VARIABLES;
       } else if (argv[a][1] == 'F') {
         if (++a == argc) {
           return missing_arg(argv[0], 'F');
@@ -273,8 +262,9 @@ main(int argc, char** argv)
           return missing_arg(prog, 'i');
         }
 
-        if (!(input_syntax = serd_syntax_by_name(argv[a]))) {
-          return print_usage(prog, true);
+        if (serd_set_input_option(
+              SERD_STRING(argv[a]), &input_syntax, &reader_flags)) {
+          return print_usage(argv[0], true);
         }
         break;
       } else if (opt == 'k') {
@@ -291,16 +281,18 @@ main(int argc, char** argv)
         stack_size = (size_t)size;
         break;
       } else if (opt == 'o') {
-        osyntax_set = true;
         if (argv[a][o + 1] || ++a == argc) {
           return missing_arg(prog, 'o');
         }
 
-        if (!strcmp(argv[a], "empty")) {
-          output_syntax = SERD_SYNTAX_EMPTY;
-        } else if (!(output_syntax = serd_syntax_by_name(argv[a]))) {
+        if (serd_set_output_option(
+              SERD_STRING(argv[a]), &output_syntax, &writer_flags)) {
           return print_usage(argv[0], true);
         }
+
+        osyntax_set =
+          output_syntax != SERD_SYNTAX_EMPTY || !strcmp(argv[a], "empty");
+
         break;
       } else if (opt == 'p') {
         if (argv[a][o + 1] || ++a == argc) {
-- 
cgit v1.2.1