aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2021-09-04 15:03:11 -0400
committerDavid Robillard <d@drobilla.net>2022-01-28 21:57:07 -0500
commit1fb84760a8230637a806e8e83410fc7fb6d446d2 (patch)
tree47ec80aef823da8d5c840cd053e4189dcabc8258
parentb3892cb6e4963e1bbeb346a8124101b7c3cf379b (diff)
downloadserd-1fb84760a8230637a806e8e83410fc7fb6d446d2.tar.gz
serd-1fb84760a8230637a806e8e83410fc7fb6d446d2.tar.bz2
serd-1fb84760a8230637a806e8e83410fc7fb6d446d2.zip
Add "contextual" output option
This is mainly for developer or power-user cases, where one wants to look at some data for investigation or debugging. In such cases, it's common for the set of prefixes to be implicitly known (because they are baked in to the application, for example), so printing them just produces a large amount of redundant noise. That said, it can also be useful programmatically, because it allows several snippets to be written independently and ultimately concatenated (with a header to define the prefixes) without redundancy.
-rw-r--r--doc/serd-filter.11
-rw-r--r--doc/serd-pipe.110
-rw-r--r--doc/serd-sort.11
-rw-r--r--include/serd/serd.h11
-rw-r--r--src/writer.c4
-rw-r--r--test/meson.build8
-rwxr-xr-xtest/test_contextual.py46
-rw-r--r--tools/console.c1
-rw-r--r--tools/serd-filter.c3
-rw-r--r--tools/serd-pipe.c2
10 files changed, 83 insertions, 4 deletions
diff --git a/doc/serd-filter.1 b/doc/serd-filter.1
index 983c1969..0f09d1d4 100644
--- a/doc/serd-filter.1
+++ b/doc/serd-filter.1
@@ -85,6 +85,7 @@ Output syntax or option:
.Cm TriG ,
.Cm Turtle ,
.Cm ascii ,
+.Cm contextual ,
.Cm expanded ,
.Cm verbatim ,
.Cm terse ,
diff --git a/doc/serd-pipe.1 b/doc/serd-pipe.1
index c7f77c9e..28f1373d 100644
--- a/doc/serd-pipe.1
+++ b/doc/serd-pipe.1
@@ -170,7 +170,7 @@ can be
.Cm Turtle ,
or one of the following options:
.Pp
-.Bl -tag -width "QverbatimQ" -compact -offset indent
+.Bl -tag -width "QcontextualQ" -compact -offset indent
.It Cm ascii
Escape all non-ASCII characters.
Normally, text is written in UTF-8.
@@ -178,6 +178,14 @@ This flag will escape non-ASCII characters in text as Unicode code points like
.Li \eU00B7 or
.Li \eU0001F600 .
.Pp
+.It Cm contextual
+Suppress writing directives that describe the context.
+Normally when writing Turtle or Trig,
+a document will have a header that defines all the prefixes used in the input.
+This flag will disable writing those directives,
+so the output is document fragment with an implicit context.
+This can be useful for writing output intended for humans.
+.Pp
.It Cm expanded
Write expanded URIs instead of prefixed names.
.Pp
diff --git a/doc/serd-sort.1 b/doc/serd-sort.1
index 350d70cd..3249fbbc 100644
--- a/doc/serd-sort.1
+++ b/doc/serd-sort.1
@@ -72,6 +72,7 @@ Output syntax or option:
.Cm TriG ,
.Cm Turtle ,
.Cm ascii ,
+.Cm contextual ,
.Cm expanded ,
.Cm verbatim ,
.Cm terse ,
diff --git a/include/serd/serd.h b/include/serd/serd.h
index 6324c06e..9eb84a6a 100644
--- a/include/serd/serd.h
+++ b/include/serd/serd.h
@@ -2589,7 +2589,16 @@ typedef enum {
This disables the special "a" syntax in Turtle and TriG.
*/
- SERD_WRITE_RDF_TYPE = 1u << 5u
+ SERD_WRITE_RDF_TYPE = 1u << 5u,
+
+ /**
+ Suppress writing directives that describe the context.
+
+ This writes data as usual, but suppresses writing `prefix` directives in
+ Turtle and TriG. The resulting output is a fragment of a document with
+ implicit context, so it will only be readable in a suitable enviromnent.
+ */
+ SERD_WRITE_CONTEXTUAL = 1u << 6u
} SerdWriterFlag;
/// Bitwise OR of SerdWriterFlag values
diff --git a/src/writer.c b/src/writer.c
index a9141f78..7cb28d0a 100644
--- a/src/writer.c
+++ b/src/writer.c
@@ -1416,6 +1416,10 @@ serd_writer_set_prefix(SerdWriter* writer,
reset_context(writer, true);
}
+ if (writer->flags & SERD_WRITE_CONTEXTUAL) {
+ return st;
+ }
+
TRY(st, esink("@prefix ", 8, writer));
TRY(st, esink(serd_node_string(name), name->length, writer));
TRY(st, esink(": <", 3, writer));
diff --git a/test/meson.build b/test/meson.build
index 37bae479..ae66ba37 100644
--- a/test/meson.build
+++ b/test/meson.build
@@ -236,6 +236,14 @@ if is_variable('serd_pipe')
env: test_env,
should_fail: true,
suite: ['tools', 'pipe', 'output'])
+
+ # Write options
+
+ test('contextual', files('test_contextual.py'),
+ args: pipe_test_script_args + files('../serd.ttl'),
+ env: test_env,
+ suite: ['tools', 'pipe', 'output'])
+
endif
# Test specifics for serd-sort
diff --git a/test/test_contextual.py b/test/test_contextual.py
new file mode 100755
index 00000000..0902fa19
--- /dev/null
+++ b/test/test_contextual.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python3
+
+"""Test writing with -O contextual (SERD_WRITE_CONTEXTUAL)."""
+
+import argparse
+import sys
+import shlex
+import subprocess
+import tempfile
+
+parser = argparse.ArgumentParser(description=__doc__)
+
+parser.add_argument("--tool", default="tools/serd-pipe", help="executable")
+parser.add_argument("--wrapper", default="", help="executable wrapper")
+parser.add_argument("input", default="", help="input file")
+
+args = parser.parse_args(sys.argv[1:])
+command = shlex.split(args.wrapper) + [
+ args.tool,
+ "-O",
+ "turtle",
+ "-O",
+ "contextual",
+ args.input,
+]
+
+DOCUMENT = "<{0}s> <{0}p> <{0}o> .".format("http://example.org/")
+
+with tempfile.TemporaryFile() as out:
+ proc = subprocess.run(
+ command,
+ check=False,
+ encoding="utf-8",
+ input=DOCUMENT,
+ stdout=out,
+ stderr=subprocess.PIPE,
+ )
+
+ assert proc.returncode == 0
+ assert args.wrapper or len(proc.stderr) == 0
+
+ out.seek(0)
+ lines = out.readlines()
+
+ for line in lines:
+ assert "@prefix" not in line.decode("utf-8")
diff --git a/tools/console.c b/tools/console.c
index 30c57575..ab36364d 100644
--- a/tools/console.c
+++ b/tools/console.c
@@ -220,6 +220,7 @@ serd_set_output_option(const SerdStringView name,
{"terse", SERD_WRITE_TERSE},
{"lax", SERD_WRITE_LAX},
{"rdf_type", SERD_WRITE_RDF_TYPE},
+ {"contextual", SERD_WRITE_CONTEXTUAL},
{NULL, SERD_WRITE_ASCII},
};
diff --git a/tools/serd-filter.c b/tools/serd-filter.c
index 147bf51d..ed22373c 100644
--- a/tools/serd-filter.c
+++ b/tools/serd-filter.c
@@ -194,7 +194,8 @@ print_usage(const char* const name, const bool error)
" -I SYNTAX Input syntax (turtle/ntriples/trig/nquads),\n"
" or option (lax/variables/relative/global/generated).\n"
" -O SYNTAX Output syntax (empty/turtle/ntriples/nquads),\n"
- " or option (ascii/expanded/verbatim/terse/lax).\n"
+ " or option "
+ "(ascii/contextual/expanded/verbatim/terse/lax).\n"
" -V Display version information and exit.\n"
" -f PATTERN_FILE Read pattern from PATTERN_FILE instead.\n"
" -h Display this help and exit.\n"
diff --git a/tools/serd-pipe.c b/tools/serd-pipe.c
index 25607d15..879a56b5 100644
--- a/tools/serd-pipe.c
+++ b/tools/serd-pipe.c
@@ -106,7 +106,7 @@ print_usage(const char* const name, const bool error)
" -I SYNTAX Input syntax (turtle/ntriples/trig/nquads),\n"
" or option (lax/variables/relative/global/generated).\n"
" -O SYNTAX Output syntax (empty/turtle/ntriples/nquads),\n"
- " or option (ascii/expanded/verbatim/terse/lax).\n"
+ " or option (ascii/contextual/expanded/verbatim/terse/lax).\n"
" -R ROOT_URI Keep relative URIs within ROOT_URI.\n"
" -V Display version information and exit.\n"
" -b BYTES I/O block size.\n"