From 0db18e483f11ac2f9518d96e137d217040ed1339 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Fri, 31 Mar 2023 20:32:26 -0400 Subject: Add "contextual" output option This is mainly for developer or power-user cases, where one wants to look at some data for investigation or debugging. In such cases, it's common for the set of prefixes to be implicitly known (because they are baked in to the application, for example), so printing them just produces a large amount of redundant noise. That said, it can also be useful programmatically, because it allows several snippets to be written independently and ultimately concatenated (with a header to define the prefixes) without redundancy. --- doc/man/serd-filter.1 | 1 + doc/man/serd-pipe.1 | 7 +++++++ doc/man/serd-sort.1 | 1 + include/serd/writer.h | 9 +++++++++ src/writer.c | 4 ++++ test/meson.build | 11 +++++++++++ test/test_contextual.py | 51 +++++++++++++++++++++++++++++++++++++++++++++++++ tools/console.c | 1 + tools/serd-filter.c | 2 +- tools/serd-pipe.c | 2 +- tools/serd-sort.c | 2 +- 11 files changed, 88 insertions(+), 3 deletions(-) create mode 100755 test/test_contextual.py diff --git a/doc/man/serd-filter.1 b/doc/man/serd-filter.1 index ff076b97..08c851de 100644 --- a/doc/man/serd-filter.1 +++ b/doc/man/serd-filter.1 @@ -74,6 +74,7 @@ Output syntax or option: .Cm TriG , .Cm Turtle , .Cm ascii , +.Cm contextual , .Cm expanded , .Cm verbatim , .Cm terse , diff --git a/doc/man/serd-pipe.1 b/doc/man/serd-pipe.1 index a5dbd328..54e3a3b1 100644 --- a/doc/man/serd-pipe.1 +++ b/doc/man/serd-pipe.1 @@ -168,6 +168,13 @@ or an option: .Bl -tag -width 3n .It Cm ascii Escape all non-ASCII characters. +.It Cm contextual +Suppress writing directives that describe the context. +Normally when writing Turtle or Trig, +a document will have a header that defines all the prefixes used in the input. +This flag will disable writing those directives, +so the output is document fragment with an implicit context. +This can be useful for writing output intended for humans. .It Cm expanded Write expanded URIs instead of prefixed names. .It Cm lax diff --git a/doc/man/serd-sort.1 b/doc/man/serd-sort.1 index 1484d67e..ede0dd58 100644 --- a/doc/man/serd-sort.1 +++ b/doc/man/serd-sort.1 @@ -66,6 +66,7 @@ Output syntax or option: .Cm TriG , .Cm Turtle , .Cm ascii , +.Cm contextual , .Cm expanded , .Cm verbatim , .Cm terse , diff --git a/include/serd/writer.h b/include/serd/writer.h index 5e8f5e6e..73f6fb0b 100644 --- a/include/serd/writer.h +++ b/include/serd/writer.h @@ -87,6 +87,15 @@ typedef enum { This disables the special "a" syntax in Turtle and TriG. */ SERD_WRITE_LONGHAND = 1U << 5U, + + /** + Suppress writing directives that describe the context. + + This writes data as usual, but suppresses writing `prefix` directives in + Turtle and TriG. The resulting output is a fragment of a document with + implicit context, so it will only be readable in a suitable enviromnent. + */ + SERD_WRITE_CONTEXTUAL = 1U << 6U, } SerdWriterFlag; /// Bitwise OR of #SerdWriterFlag values diff --git a/src/writer.c b/src/writer.c index 30b8bb4e..7b821145 100644 --- a/src/writer.c +++ b/src/writer.c @@ -1434,6 +1434,10 @@ serd_writer_set_prefix(SerdWriter* writer, if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) { TRY(st, terminate_context(writer)); + if (writer->flags & SERD_WRITE_CONTEXTUAL) { + return st; + } + TRY(st, esink("@prefix ", 8, writer)); TRY(st, esink(serd_node_string(name), name->length, writer)); TRY(st, esink(": <", 3, writer)); diff --git a/test/meson.build b/test/meson.build index 1f736afc..6bb0e033 100644 --- a/test/meson.build +++ b/test/meson.build @@ -19,6 +19,7 @@ simple_script_paths = [ 'run_filter_suite.py', 'run_suite.py', 'test_base.py', + 'test_contextual.py', 'test_empty.py', 'test_multifile.py', 'test_patterns.py', @@ -446,6 +447,16 @@ if is_variable('serd_pipe') suite: 'io', ) endif + + # Write options + + test( + 'contextual', + files('test_contextual.py'), + args: pipe_script_args + files('../serd.ttl'), + env: test_env, + suite: ['tools', 'pipe', 'output'], + ) endif # Test specifics to serd-filter diff --git a/test/test_contextual.py b/test/test_contextual.py new file mode 100755 index 00000000..6a584c5b --- /dev/null +++ b/test/test_contextual.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 + +# Copyright 2022 David Robillard +# SPDX-License-Identifier: ISC + +"""Test writing with -O contextual (SERD_WRITE_CONTEXTUAL).""" + +# pylint: disable=consider-using-f-string + +import argparse +import sys +import shlex +import subprocess +import tempfile + +parser = argparse.ArgumentParser(description=__doc__) + +parser.add_argument("--tool", default="tools/serd-pipe", help="executable") +parser.add_argument("--wrapper", default="", help="executable wrapper") +parser.add_argument("input", default="", help="input file") + +args = parser.parse_args(sys.argv[1:]) +command = shlex.split(args.wrapper) + [ + args.tool, + "-O", + "turtle", + "-O", + "contextual", + args.input, +] + +DOC = "<{0}s> <{0}p> <{0}o> .".format("http://example.org/") + +with tempfile.TemporaryFile() as out: + proc = subprocess.run( + command, + check=False, + encoding="utf-8", + input=DOC, + stdout=out, + stderr=subprocess.PIPE, + ) + + assert proc.returncode == 0 + assert args.wrapper or len(proc.stderr) == 0 + + out.seek(0) + lines = out.readlines() + + for line in lines: + assert "@prefix" not in line.decode("utf-8") diff --git a/tools/console.c b/tools/console.c index 2a396af0..94d9a0c5 100644 --- a/tools/console.c +++ b/tools/console.c @@ -325,6 +325,7 @@ serd_set_output_option(const SerdStringView name, static const OutputOption output_options[] = { {"ascii", SERD_WRITE_ASCII}, + {"contextual", SERD_WRITE_CONTEXTUAL}, {"expanded", SERD_WRITE_EXPANDED}, {"lax", SERD_WRITE_LAX}, {"longhand", SERD_WRITE_LONGHAND}, diff --git a/tools/serd-filter.c b/tools/serd-filter.c index 70d7b68c..53424e62 100644 --- a/tools/serd-filter.c +++ b/tools/serd-filter.c @@ -213,7 +213,7 @@ print_usage(const char* const name, const bool error) " -I SYNTAX Input syntax turtle/ntriples/trig/nquads, or option\n" " lax/variables/relative/global/generated.\n" " -O SYNTAX Output syntax empty/turtle/ntriples/nquads, or option\n" - " ascii/expanded/verbatim/terse/lax.\n" + " ascii/contextual/expanded/verbatim/terse/lax.\n" " -V Display version information and exit.\n" " -f PATTERN_FILE Read pattern from PATTERN_FILE instead.\n" " -h Display this help and exit.\n" diff --git a/tools/serd-pipe.c b/tools/serd-pipe.c index fb1586b4..569119fb 100644 --- a/tools/serd-pipe.c +++ b/tools/serd-pipe.c @@ -102,7 +102,7 @@ print_usage(const char* const name, const bool error) " -I SYNTAX Input syntax turtle/ntriples/trig/nquads, or option\n" " lax/variables/relative/global/generated.\n" " -O SYNTAX Output syntax empty/turtle/ntriples/nquads, or option\n" - " ascii/expanded/verbatim/terse/lax.\n" + " ascii/contextual/expanded/verbatim/terse/lax.\n" " -R ROOT_URI Keep relative URIs within ROOT_URI.\n" " -V Display version information and exit.\n" " -b BYTES I/O block size.\n" diff --git a/tools/serd-sort.c b/tools/serd-sort.c index 3b9c829a..75fb781c 100644 --- a/tools/serd-sort.c +++ b/tools/serd-sort.c @@ -172,7 +172,7 @@ print_usage(const char* const name, const bool error) " -I SYNTAX Input syntax turtle/ntriples/trig/nquads, or option\n" " lax/variables/relative/global/generated.\n" " -O SYNTAX Output syntax empty/turtle/ntriples/nquads, or option\n" - " ascii/expanded/verbatim/terse/lax.\n" + " ascii/contextual/expanded/verbatim/terse/lax.\n" " -V Display version information and exit.\n" " -b BYTES I/O block size.\n" " -c COLLATION An optional \"G\" then the letters \"SPO\" in any order.\n" -- cgit v1.2.1