diff options
author | David Robillard <d@drobilla.net> | 2021-09-04 15:03:11 -0400 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2022-01-28 21:57:07 -0500 |
commit | 1fb84760a8230637a806e8e83410fc7fb6d446d2 (patch) | |
tree | 47ec80aef823da8d5c840cd053e4189dcabc8258 | |
parent | b3892cb6e4963e1bbeb346a8124101b7c3cf379b (diff) | |
download | serd-1fb84760a8230637a806e8e83410fc7fb6d446d2.tar.gz serd-1fb84760a8230637a806e8e83410fc7fb6d446d2.tar.bz2 serd-1fb84760a8230637a806e8e83410fc7fb6d446d2.zip |
Add "contextual" output option
This is mainly for developer or power-user cases, where one wants to look at
some data for investigation or debugging. In such cases, it's common for the
set of prefixes to be implicitly known (because they are baked in to the
application, for example), so printing them just produces a large amount of
redundant noise.
That said, it can also be useful programmatically, because it allows several
snippets to be written independently and ultimately concatenated (with a header
to define the prefixes) without redundancy.
-rw-r--r-- | doc/serd-filter.1 | 1 | ||||
-rw-r--r-- | doc/serd-pipe.1 | 10 | ||||
-rw-r--r-- | doc/serd-sort.1 | 1 | ||||
-rw-r--r-- | include/serd/serd.h | 11 | ||||
-rw-r--r-- | src/writer.c | 4 | ||||
-rw-r--r-- | test/meson.build | 8 | ||||
-rwxr-xr-x | test/test_contextual.py | 46 | ||||
-rw-r--r-- | tools/console.c | 1 | ||||
-rw-r--r-- | tools/serd-filter.c | 3 | ||||
-rw-r--r-- | tools/serd-pipe.c | 2 |
10 files changed, 83 insertions, 4 deletions
diff --git a/doc/serd-filter.1 b/doc/serd-filter.1 index 983c1969..0f09d1d4 100644 --- a/doc/serd-filter.1 +++ b/doc/serd-filter.1 @@ -85,6 +85,7 @@ Output syntax or option: .Cm TriG , .Cm Turtle , .Cm ascii , +.Cm contextual , .Cm expanded , .Cm verbatim , .Cm terse , diff --git a/doc/serd-pipe.1 b/doc/serd-pipe.1 index c7f77c9e..28f1373d 100644 --- a/doc/serd-pipe.1 +++ b/doc/serd-pipe.1 @@ -170,7 +170,7 @@ can be .Cm Turtle , or one of the following options: .Pp -.Bl -tag -width "QverbatimQ" -compact -offset indent +.Bl -tag -width "QcontextualQ" -compact -offset indent .It Cm ascii Escape all non-ASCII characters. Normally, text is written in UTF-8. @@ -178,6 +178,14 @@ This flag will escape non-ASCII characters in text as Unicode code points like .Li \eU00B7 or .Li \eU0001F600 . .Pp +.It Cm contextual +Suppress writing directives that describe the context. +Normally when writing Turtle or Trig, +a document will have a header that defines all the prefixes used in the input. +This flag will disable writing those directives, +so the output is document fragment with an implicit context. +This can be useful for writing output intended for humans. +.Pp .It Cm expanded Write expanded URIs instead of prefixed names. .Pp diff --git a/doc/serd-sort.1 b/doc/serd-sort.1 index 350d70cd..3249fbbc 100644 --- a/doc/serd-sort.1 +++ b/doc/serd-sort.1 @@ -72,6 +72,7 @@ Output syntax or option: .Cm TriG , .Cm Turtle , .Cm ascii , +.Cm contextual , .Cm expanded , .Cm verbatim , .Cm terse , diff --git a/include/serd/serd.h b/include/serd/serd.h index 6324c06e..9eb84a6a 100644 --- a/include/serd/serd.h +++ b/include/serd/serd.h @@ -2589,7 +2589,16 @@ typedef enum { This disables the special "a" syntax in Turtle and TriG. */ - SERD_WRITE_RDF_TYPE = 1u << 5u + SERD_WRITE_RDF_TYPE = 1u << 5u, + + /** + Suppress writing directives that describe the context. + + This writes data as usual, but suppresses writing `prefix` directives in + Turtle and TriG. The resulting output is a fragment of a document with + implicit context, so it will only be readable in a suitable enviromnent. + */ + SERD_WRITE_CONTEXTUAL = 1u << 6u } SerdWriterFlag; /// Bitwise OR of SerdWriterFlag values diff --git a/src/writer.c b/src/writer.c index a9141f78..7cb28d0a 100644 --- a/src/writer.c +++ b/src/writer.c @@ -1416,6 +1416,10 @@ serd_writer_set_prefix(SerdWriter* writer, reset_context(writer, true); } + if (writer->flags & SERD_WRITE_CONTEXTUAL) { + return st; + } + TRY(st, esink("@prefix ", 8, writer)); TRY(st, esink(serd_node_string(name), name->length, writer)); TRY(st, esink(": <", 3, writer)); diff --git a/test/meson.build b/test/meson.build index 37bae479..ae66ba37 100644 --- a/test/meson.build +++ b/test/meson.build @@ -236,6 +236,14 @@ if is_variable('serd_pipe') env: test_env, should_fail: true, suite: ['tools', 'pipe', 'output']) + + # Write options + + test('contextual', files('test_contextual.py'), + args: pipe_test_script_args + files('../serd.ttl'), + env: test_env, + suite: ['tools', 'pipe', 'output']) + endif # Test specifics for serd-sort diff --git a/test/test_contextual.py b/test/test_contextual.py new file mode 100755 index 00000000..0902fa19 --- /dev/null +++ b/test/test_contextual.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 + +"""Test writing with -O contextual (SERD_WRITE_CONTEXTUAL).""" + +import argparse +import sys +import shlex +import subprocess +import tempfile + +parser = argparse.ArgumentParser(description=__doc__) + +parser.add_argument("--tool", default="tools/serd-pipe", help="executable") +parser.add_argument("--wrapper", default="", help="executable wrapper") +parser.add_argument("input", default="", help="input file") + +args = parser.parse_args(sys.argv[1:]) +command = shlex.split(args.wrapper) + [ + args.tool, + "-O", + "turtle", + "-O", + "contextual", + args.input, +] + +DOCUMENT = "<{0}s> <{0}p> <{0}o> .".format("http://example.org/") + +with tempfile.TemporaryFile() as out: + proc = subprocess.run( + command, + check=False, + encoding="utf-8", + input=DOCUMENT, + stdout=out, + stderr=subprocess.PIPE, + ) + + assert proc.returncode == 0 + assert args.wrapper or len(proc.stderr) == 0 + + out.seek(0) + lines = out.readlines() + + for line in lines: + assert "@prefix" not in line.decode("utf-8") diff --git a/tools/console.c b/tools/console.c index 30c57575..ab36364d 100644 --- a/tools/console.c +++ b/tools/console.c @@ -220,6 +220,7 @@ serd_set_output_option(const SerdStringView name, {"terse", SERD_WRITE_TERSE}, {"lax", SERD_WRITE_LAX}, {"rdf_type", SERD_WRITE_RDF_TYPE}, + {"contextual", SERD_WRITE_CONTEXTUAL}, {NULL, SERD_WRITE_ASCII}, }; diff --git a/tools/serd-filter.c b/tools/serd-filter.c index 147bf51d..ed22373c 100644 --- a/tools/serd-filter.c +++ b/tools/serd-filter.c @@ -194,7 +194,8 @@ print_usage(const char* const name, const bool error) " -I SYNTAX Input syntax (turtle/ntriples/trig/nquads),\n" " or option (lax/variables/relative/global/generated).\n" " -O SYNTAX Output syntax (empty/turtle/ntriples/nquads),\n" - " or option (ascii/expanded/verbatim/terse/lax).\n" + " or option " + "(ascii/contextual/expanded/verbatim/terse/lax).\n" " -V Display version information and exit.\n" " -f PATTERN_FILE Read pattern from PATTERN_FILE instead.\n" " -h Display this help and exit.\n" diff --git a/tools/serd-pipe.c b/tools/serd-pipe.c index 25607d15..879a56b5 100644 --- a/tools/serd-pipe.c +++ b/tools/serd-pipe.c @@ -106,7 +106,7 @@ print_usage(const char* const name, const bool error) " -I SYNTAX Input syntax (turtle/ntriples/trig/nquads),\n" " or option (lax/variables/relative/global/generated).\n" " -O SYNTAX Output syntax (empty/turtle/ntriples/nquads),\n" - " or option (ascii/expanded/verbatim/terse/lax).\n" + " or option (ascii/contextual/expanded/verbatim/terse/lax).\n" " -R ROOT_URI Keep relative URIs within ROOT_URI.\n" " -V Display version information and exit.\n" " -b BYTES I/O block size.\n" |