aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2023-04-04 06:35:51 -0400
committerDavid Robillard <d@drobilla.net>2023-12-02 18:49:07 -0500
commit7219a81ad6cff840eb604983b3752d4f2097532a (patch)
tree5c071ae9575f05a75f17a2d8280e9e9aed6440ac
parent1c7a8efa90003d21e19b47bbad185390cea5963c (diff)
downloadserd-7219a81ad6cff840eb604983b3752d4f2097532a.tar.gz
serd-7219a81ad6cff840eb604983b3752d4f2097532a.tar.bz2
serd-7219a81ad6cff840eb604983b3752d4f2097532a.zip
Add support for writing terse output with minimal newlines
-rw-r--r--NEWS1
-rw-r--r--doc/man/serdi.14
-rw-r--r--include/serd/statement.h2
-rw-r--r--include/serd/writer.h1
-rw-r--r--src/serdi.c3
-rw-r--r--src/writer.c57
-rw-r--r--test/extra/abbreviate/collapse-graphs.trig1
-rw-r--r--test/extra/terse/blank_object.ttl2
-rw-r--r--test/extra/terse/blank_subject.ttl2
-rw-r--r--test/extra/terse/collection_object.ttl2
-rw-r--r--test/extra/terse/collection_subject.ttl2
-rw-r--r--test/extra/terse/manifest.ttl39
-rw-r--r--test/extra/terse/nil_object.ttl2
-rw-r--r--test/extra/terse/nil_subject.ttl2
-rw-r--r--test/meson.build8
-rw-r--r--test/serd_test_util/__init__.py2
-rw-r--r--test/test_terse_write.c112
17 files changed, 225 insertions, 17 deletions
diff --git a/NEWS b/NEWS
index 01a292fa..baaeb8a0 100644
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,7 @@ serd (1.1.1) unstable; urgency=medium
* Add SerdBuffer for mutable buffers to keep SerdChunk const-correct
* Add SerdWorld for shared library state
+ * Add support for writing terse output with minimal newlines
* Add support for xsd:float and xsd:double literals
* Bring read/write interface closer to C standard
* Make nodes opaque
diff --git a/doc/man/serdi.1 b/doc/man/serdi.1
index 24a4ad46..f03fe23d 100644
--- a/doc/man/serdi.1
+++ b/doc/man/serdi.1
@@ -8,7 +8,7 @@
.Nd read and write RDF syntax
.Sh SYNOPSIS
.Nm serdi
-.Op Fl abefhlqv
+.Op Fl abefhlqtv
.Op Fl c Ar prefix
.Op Fl i Ar syntax
.Op Fl k Ar bytes
@@ -137,6 +137,8 @@ This is useful for keeping relative references within some directory.
Parse
.Ar string
input instead of a file (terminates options).
+.It Fl t
+Write terser output without newlines.
.It Fl v
Display version information and exit.
.El
diff --git a/include/serd/statement.h b/include/serd/statement.h
index aee632cb..a1932796 100644
--- a/include/serd/statement.h
+++ b/include/serd/statement.h
@@ -35,6 +35,8 @@ typedef enum {
SERD_ANON_O = 1U << 3U, ///< Start of anonymous object
SERD_LIST_S = 1U << 4U, ///< Start of list subject
SERD_LIST_O = 1U << 5U, ///< Start of list object
+ SERD_TERSE_S = 1U << 6U, ///< Start of terse subject
+ SERD_TERSE_O = 1U << 7U, ///< Start of terse object
} SerdStatementFlag;
/// Bitwise OR of SerdStatementFlag values
diff --git a/include/serd/writer.h b/include/serd/writer.h
index db402fea..36880d2a 100644
--- a/include/serd/writer.h
+++ b/include/serd/writer.h
@@ -39,6 +39,7 @@ typedef enum {
SERD_WRITE_UNRESOLVED = 1U << 2U, ///< Do not make URIs relative
SERD_WRITE_BULK = 1U << 3U, ///< Write output in pages
SERD_WRITE_STRICT = 1U << 4U, ///< Abort with error on lossy output
+ SERD_WRITE_TERSE = 1U << 5U, ///< Write terser output without newlines
} SerdWriterFlag;
/// Bitwise OR of #SerdWriterFlag values
diff --git a/src/serdi.c b/src/serdi.c
index 01e2e764..59c66cae 100644
--- a/src/serdi.c
+++ b/src/serdi.c
@@ -68,6 +68,7 @@ print_usage(const char* const name, const bool error)
" -q Suppress all output except data.\n"
" -r ROOT_URI Keep relative URIs within ROOT_URI.\n"
" -s INPUT Parse INPUT as string (terminates options).\n"
+ " -t Write terser output without newlines.\n"
" -v Display version information and exit.\n";
FILE* const os = error ? stderr : stdout;
@@ -142,6 +143,8 @@ main(int argc, char** argv)
writer_flags &= ~(SerdWriterFlags)SERD_WRITE_STRICT;
} else if (opt == 'q') {
quiet = true;
+ } else if (opt == 't') {
+ writer_flags |= SERD_WRITE_TERSE;
} else if (opt == 'v') {
return print_version();
} else if (opt == 's') {
diff --git a/src/writer.c b/src/writer.c
index 19ce926b..3079cebc 100644
--- a/src/writer.c
+++ b/src/writer.c
@@ -73,8 +73,11 @@ typedef enum {
SEP_ANON_S_P, ///< Between anonymous subject and predicate (whitespace)
SEP_ANON_END, ///< End of anonymous node (']')
SEP_LIST_BEGIN, ///< Start of list ('(')
- SEP_LIST_SEP, ///< List separator (whitespace)
+ SEP_LIST_SEP, ///< List separator (newline)
SEP_LIST_END, ///< End of list (')')
+ SEP_TLIST_BEGIN, ///< Start of terse list ('(')
+ SEP_TLIST_SEP, ///< Terse list separator (space)
+ SEP_TLIST_END, ///< End of terse list (')')
SEP_GRAPH_BEGIN, ///< Start of graph ('{')
SEP_GRAPH_END, ///< End of graph ('}')
} Sep;
@@ -97,7 +100,7 @@ static const SepRule rules[] = {
{NIL, +0, SEP_NONE, SEP_NONE, SEP_NONE},
{NIL, +0, SEP_NONE, SEP_NONE, SEP_NONE},
{'\n', 0, SEP_NONE, SEP_NONE, SEP_NONE},
- {'.', +0, SEP_EACH, SEP_NONE, SEP_EACH},
+ {'.', +0, SEP_EACH, SEP_NONE, SEP_NONE},
{'.', +0, SEP_EACH, SEP_NONE, SEP_NONE},
{';', +0, SEP_EACH, SEP_NONE, SEP_EACH},
{',', +0, SEP_EACH, SEP_NONE, SEP_EACH},
@@ -106,12 +109,15 @@ static const SepRule rules[] = {
{',', +0, SEP_EACH, SEP_NONE, SEP_NONE},
{NIL, +1, SEP_NONE, SEP_NONE, SEP_EACH},
{' ', +0, SEP_NONE, SEP_NONE, SEP_NONE},
- {'[', +1, M(SEP_JOIN_O_AA), SEP_NONE, SEP_NONE},
+ {'[', +1, M(SEP_JOIN_O_AA), M(SEP_TLIST_BEGIN) | M(SEP_TLIST_SEP), SEP_NONE},
{NIL, +1, SEP_NONE, SEP_NONE, M(SEP_ANON_BEGIN)},
{']', -1, SEP_NONE, ~M(SEP_ANON_BEGIN), SEP_NONE},
{'(', +1, M(SEP_JOIN_O_AA), SEP_NONE, SEP_EACH},
{NIL, +0, SEP_NONE, SEP_EACH, SEP_NONE},
{')', -1, SEP_NONE, SEP_EACH, SEP_NONE},
+ {'(', +1, SEP_NONE, SEP_NONE, SEP_NONE},
+ {NIL, +0, SEP_EACH, SEP_NONE, SEP_NONE},
+ {')', -1, SEP_NONE, SEP_NONE, SEP_NONE},
{'{', +1, SEP_EACH, SEP_NONE, SEP_EACH},
{'}', -1, SEP_NONE, SEP_NONE, SEP_EACH},
};
@@ -582,10 +588,14 @@ uri_sink(const void* buf, size_t size, size_t nmemb, void* stream)
}
SERD_NODISCARD static SerdStatus
-write_newline(SerdWriter* writer)
+write_newline(SerdWriter* writer, bool terse)
{
SerdStatus st = SERD_SUCCESS;
+ if (terse || (writer->flags & SERD_WRITE_TERSE)) {
+ return esink(" ", 1, writer);
+ }
+
TRY(st, esink("\n", 1, writer));
for (int i = 0; i < writer->indent; ++i) {
TRY(st, esink("\t", 1, writer));
@@ -595,16 +605,29 @@ write_newline(SerdWriter* writer)
}
SERD_NODISCARD static SerdStatus
-write_sep(SerdWriter* writer, const SerdStatementFlags flags, Sep sep)
+write_top_level_sep(SerdWriter* writer)
{
- (void)flags;
+ return (writer->last_sep && !(writer->flags & SERD_WRITE_TERSE))
+ ? write_newline(writer, false)
+ : SERD_SUCCESS;
+}
+SERD_NODISCARD static SerdStatus
+write_sep(SerdWriter* writer, const SerdStatementFlags flags, Sep sep)
+{
SerdStatus st = SERD_SUCCESS;
const SepRule* const rule = &rules[sep];
const bool pre_line = (rule->pre_line_after & (1U << writer->last_sep));
const bool post_line = (rule->post_line_after & (1U << writer->last_sep));
+ const bool terse = (((flags & SERD_TERSE_S) && (flags & SERD_LIST_S)) ||
+ ((flags & SERD_TERSE_O) && (flags & SERD_LIST_O)));
+
+ if (terse && sep >= SEP_LIST_BEGIN && sep <= SEP_LIST_END) {
+ sep = (Sep)((int)sep + 3); // Switch to corresponding terse separator
+ }
+
// Adjust indent, but tolerate if it would become negative
if (rule->indent && (pre_line || post_line)) {
writer->indent = ((rule->indent >= 0 || writer->indent >= -rule->indent)
@@ -620,7 +643,7 @@ write_sep(SerdWriter* writer, const SerdStatementFlags flags, Sep sep)
// Write newline or space before separator if necessary
if (pre_line) {
- TRY(st, write_newline(writer));
+ TRY(st, write_newline(writer, terse));
} else if (rule->pre_space_after & (1U << writer->last_sep)) {
TRY(st, esink(" ", 1, writer));
}
@@ -632,18 +655,20 @@ write_sep(SerdWriter* writer, const SerdStatementFlags flags, Sep sep)
// Write newline after separator if necessary
if (post_line) {
- TRY(st, write_newline(writer));
+ TRY(st, write_newline(writer, terse));
if (rule->post_line_after != ~(SepMask)0U) {
writer->last_sep = SEP_NEWLINE;
}
}
// Reset context and write a blank line after ends of subjects
- if (sep == SEP_END_S) {
+ if (sep == SEP_END_S || sep == SEP_END_DIRECT) {
writer->indent = ctx(writer, SERD_GRAPH) ? 1 : 0;
writer->context.predicates = false;
writer->context.comma_indented = false;
- TRY(st, esink("\n", 1, writer));
+ if (!terse) {
+ TRY(st, esink("\n", 1, writer));
+ }
}
writer->last_sep = sep;
@@ -987,8 +1012,10 @@ serd_writer_write_statement(SerdWriter* const writer,
const SerdNode* const graph = serd_statement_graph(statement);
if (!is_resource(subject) || !is_resource(predicate) || !object ||
- ((flags & SERD_ANON_S) && (flags & SERD_LIST_S)) ||
- ((flags & SERD_ANON_O) && (flags & SERD_LIST_O))) {
+ ((flags & SERD_ANON_S) && (flags & SERD_LIST_S)) || // Nonsense
+ ((flags & SERD_ANON_O) && (flags & SERD_LIST_O)) || // Nonsense
+ ((flags & SERD_ANON_S) && (flags & SERD_TERSE_S)) || // Unsupported
+ ((flags & SERD_ANON_O) && (flags & SERD_TERSE_O))) { // Unsupported
return SERD_BAD_ARG;
}
@@ -1013,7 +1040,7 @@ serd_writer_write_statement(SerdWriter* const writer,
TRY(st, terminate_context(writer));
reset_context(writer, RESET_GRAPH | RESET_INDENT);
if (graph) {
- TRY(st, write_newline(writer));
+ TRY(st, write_top_level_sep(writer));
TRY(st, write_node(writer, graph, SERD_GRAPH, flags));
TRY(st, write_sep(writer, flags, SEP_GRAPH_BEGIN));
serd_node_set(&writer->context.graph, graph);
@@ -1072,7 +1099,7 @@ serd_writer_write_statement(SerdWriter* const writer,
}
if (writer->last_sep == SEP_END_S || writer->last_sep == SEP_END_DIRECT) {
- TRY(st, write_newline(writer));
+ TRY(st, write_top_level_sep(writer));
}
TRY(st, write_node(writer, subject, SERD_SUBJECT, flags));
@@ -1172,8 +1199,10 @@ serd_writer_finish(SerdWriter* writer)
{
const SerdStatus st0 = terminate_context(writer);
const SerdStatus st1 = serd_byte_sink_flush(&writer->byte_sink);
+
free_anon_stack(writer);
reset_context(writer, RESET_GRAPH | RESET_INDENT);
+ writer->last_sep = SEP_NONE;
return st0 ? st0 : st1;
}
diff --git a/test/extra/abbreviate/collapse-graphs.trig b/test/extra/abbreviate/collapse-graphs.trig
index 5bdf55ce..4f8e4344 100644
--- a/test/extra/abbreviate/collapse-graphs.trig
+++ b/test/extra/abbreviate/collapse-graphs.trig
@@ -1,4 +1,3 @@
-
<http://example.org/g1> {
<http://example.org/s1>
<http://example.org/p1> <http://example.org/o1> ;
diff --git a/test/extra/terse/blank_object.ttl b/test/extra/terse/blank_object.ttl
new file mode 100644
index 00000000..270e406b
--- /dev/null
+++ b/test/extra/terse/blank_object.ttl
@@ -0,0 +1,2 @@
+@prefix eg: <http://example.org/> .
+eg:s eg:p1 [ eg:p11 1 ; eg:p12 2 ] ; eg:p2 [ eg:p23 3 ; eg:p24 4 ] .
diff --git a/test/extra/terse/blank_subject.ttl b/test/extra/terse/blank_subject.ttl
new file mode 100644
index 00000000..5e3303f9
--- /dev/null
+++ b/test/extra/terse/blank_subject.ttl
@@ -0,0 +1,2 @@
+@prefix eg: <http://example.org/> .
+[] eg:p1 1 ; eg:p2 2 .
diff --git a/test/extra/terse/collection_object.ttl b/test/extra/terse/collection_object.ttl
new file mode 100644
index 00000000..3310c418
--- /dev/null
+++ b/test/extra/terse/collection_object.ttl
@@ -0,0 +1,2 @@
+@prefix eg: <http://example.org/> .
+eg:s eg:p1 ( 1 2 ) ; eg:p2 ( 3 4 ) .
diff --git a/test/extra/terse/collection_subject.ttl b/test/extra/terse/collection_subject.ttl
new file mode 100644
index 00000000..43670620
--- /dev/null
+++ b/test/extra/terse/collection_subject.ttl
@@ -0,0 +1,2 @@
+@prefix eg: <http://example.org/> .
+( 1 2 ) eg:p3 3 ; eg:p4 4 .
diff --git a/test/extra/terse/manifest.ttl b/test/extra/terse/manifest.ttl
new file mode 100644
index 00000000..6773b97d
--- /dev/null
+++ b/test/extra/terse/manifest.ttl
@@ -0,0 +1,39 @@
+@prefix mf: <http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix rdft: <http://www.w3.org/ns/rdftest#> .
+
+<>
+ a mf:Manifest ;
+ rdfs:comment "Serd terse writing test suite" ;
+ mf:entries (
+ <#blank_object>
+ <#blank_subject>
+ <#collection_object>
+ <#collection_subject>
+ <#nil_object>
+ <#nil_subject>
+ ) .
+
+<#blank_object>
+ a rdft:TestTurtleEval ;
+ mf:action <blank_object.ttl> ;
+ mf:name "blank_object" ;
+ mf:result <blank_object.ttl> .
+
+<#blank_subject>
+ a rdft:TestTurtleEval ;
+ mf:action <blank_subject.ttl> ;
+ mf:name "blank_subject" ;
+ mf:result <blank_subject.ttl> .
+
+<#collection_object>
+ a rdft:TestTurtleEval ;
+ mf:action <collection_object.ttl> ;
+ mf:name "collection_object" ;
+ mf:result <collection_object.ttl> .
+
+<#collection_subject>
+ a rdft:TestTurtleEval ;
+ mf:action <collection_subject.ttl> ;
+ mf:name "collection_subject" ;
+ mf:result <collection_subject.ttl> .
diff --git a/test/extra/terse/nil_object.ttl b/test/extra/terse/nil_object.ttl
new file mode 100644
index 00000000..4a0f93bd
--- /dev/null
+++ b/test/extra/terse/nil_object.ttl
@@ -0,0 +1,2 @@
+@prefix eg: <http://example.org/> .
+eg:s eg:p1 () ; eg:p2 [] , () .
diff --git a/test/extra/terse/nil_subject.ttl b/test/extra/terse/nil_subject.ttl
new file mode 100644
index 00000000..a08f7d82
--- /dev/null
+++ b/test/extra/terse/nil_subject.ttl
@@ -0,0 +1,2 @@
+@prefix eg: <http://example.org/> .
+() eg:p1 1 ; eg:p2 2 .
diff --git a/test/meson.build b/test/meson.build
index 937b9038..2ed73e63 100644
--- a/test/meson.build
+++ b/test/meson.build
@@ -34,6 +34,7 @@ ttl_metadata_file_paths = [
'extra/pretty/manifest.ttl',
'extra/qualify/manifest.ttl',
'extra/root/manifest.ttl',
+ 'extra/terse/manifest.ttl',
]
plot_scripts = files(plot_script_paths)
@@ -129,6 +130,7 @@ unit_tests = [
'statement',
'string',
'syntax',
+ 'terse_write',
'uri',
'world',
'writer',
@@ -407,6 +409,12 @@ test_suites = {
'--',
['-r', 'http://example.org/top/root/'],
],
+ 'terse': [
+ files('extra/terse/manifest.ttl'),
+ ns_serdtest + 'terse/',
+ '--',
+ '-t',
+ ],
}
# Run every test suite with serdi
diff --git a/test/serd_test_util/__init__.py b/test/serd_test_util/__init__.py
index 8027462b..04876f98 100644
--- a/test/serd_test_util/__init__.py
+++ b/test/serd_test_util/__init__.py
@@ -60,7 +60,7 @@ def print_result_summary(results):
failed, total = (results.n_failures, results.n_tests)
if failed == 0:
- sys.stdout.write("All {} tests passed\n".format(total))
+ print("All {} tests passed".format(total))
else:
error("{}/{} tests failed".format(failed, total))
diff --git a/test/test_terse_write.c b/test/test_terse_write.c
new file mode 100644
index 00000000..277f025e
--- /dev/null
+++ b/test/test_terse_write.c
@@ -0,0 +1,112 @@
+// Copyright 2019-2020 David Robillard <d@drobilla.net>
+// SPDX-License-Identifier: ISC
+
+#undef NDEBUG
+
+#include "serd/buffer.h"
+#include "serd/env.h"
+#include "serd/node.h"
+#include "serd/sink.h"
+#include "serd/statement.h"
+#include "serd/stream.h"
+#include "serd/string_view.h"
+#include "serd/syntax.h"
+#include "serd/world.h"
+#include "serd/writer.h"
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+
+static void
+check_output(SerdWriter* writer, SerdBuffer* buffer, const char* expected)
+{
+ serd_writer_finish(writer);
+ serd_buffer_sink_finish(buffer);
+
+ const char* output = (const char*)buffer->buf;
+
+ assert(!strcmp(output, expected));
+
+ buffer->len = 0;
+}
+
+static int
+test(void)
+{
+ SerdBuffer buffer = {NULL, 0};
+ SerdWorld* world = serd_world_new();
+ SerdEnv* env = serd_env_new(serd_empty_string());
+
+ SerdNode* b1 = serd_new_blank(serd_string("b1"));
+ SerdNode* l1 = serd_new_blank(serd_string("l1"));
+ SerdNode* l2 = serd_new_blank(serd_string("l2"));
+ SerdNode* s1 = serd_new_string(serd_string("s1"));
+ SerdNode* s2 = serd_new_string(serd_string("s2"));
+
+ SerdNode* rdf_first = serd_new_uri(serd_string(NS_RDF "first"));
+ SerdNode* rdf_value = serd_new_uri(serd_string(NS_RDF "value"));
+ SerdNode* rdf_rest = serd_new_uri(serd_string(NS_RDF "rest"));
+ SerdNode* rdf_nil = serd_new_uri(serd_string(NS_RDF "nil"));
+
+ serd_env_set_prefix(env, serd_string("rdf"), serd_string(NS_RDF));
+
+ SerdWriter* writer = serd_writer_new(
+ world, SERD_TURTLE, 0, env, (SerdWriteFunc)serd_buffer_sink, &buffer);
+
+ const SerdSink* sink = serd_writer_sink(writer);
+
+ // Simple lone list
+ serd_sink_write(sink, SERD_TERSE_S | SERD_LIST_S, l1, rdf_first, s1, NULL);
+ serd_sink_write(sink, 0, l1, rdf_rest, l2, NULL);
+ serd_sink_write(sink, 0, l2, rdf_first, s2, NULL);
+ serd_sink_write(sink, 0, l2, rdf_rest, rdf_nil, NULL);
+ check_output(writer, &buffer, "( \"s1\" \"s2\" ) .\n");
+
+ // Nested terse lists
+ serd_sink_write(sink,
+ SERD_TERSE_S | SERD_LIST_S | SERD_TERSE_O | SERD_LIST_O,
+ l1,
+ rdf_first,
+ l2,
+ NULL);
+ serd_sink_write(sink, 0, l2, rdf_first, s1, NULL);
+ serd_sink_write(sink, 0, l1, rdf_rest, rdf_nil, NULL);
+ serd_sink_write(sink, 0, l2, rdf_rest, rdf_nil, NULL);
+ check_output(writer, &buffer, "( ( \"s1\" ) ) .\n");
+
+ // List as object
+ serd_sink_write(
+ sink, SERD_EMPTY_S | SERD_LIST_O | SERD_TERSE_O, b1, rdf_value, l1, NULL);
+ serd_sink_write(sink, 0, l1, rdf_first, s1, NULL);
+ serd_sink_write(sink, 0, l1, rdf_rest, l2, NULL);
+ serd_sink_write(sink, 0, l2, rdf_first, s2, NULL);
+ serd_sink_write(sink, 0, l2, rdf_rest, rdf_nil, NULL);
+ check_output(writer, &buffer, "[] rdf:value ( \"s1\" \"s2\" ) .\n");
+
+ serd_buffer_sink_finish(&buffer);
+ serd_writer_free(writer);
+ serd_node_free(rdf_nil);
+ serd_node_free(rdf_rest);
+ serd_node_free(rdf_value);
+ serd_node_free(rdf_first);
+ serd_node_free(s2);
+ serd_node_free(s1);
+ serd_node_free(l2);
+ serd_node_free(l1);
+ serd_node_free(b1);
+ serd_env_free(env);
+ serd_world_free(world);
+ free(buffer.buf);
+
+ return 0;
+}
+
+int
+main(void)
+{
+ return test();
+}