diff options
author | David Robillard <d@drobilla.net> | 2023-04-04 06:35:51 -0400 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2023-12-02 18:49:07 -0500 |
commit | 7219a81ad6cff840eb604983b3752d4f2097532a (patch) | |
tree | 5c071ae9575f05a75f17a2d8280e9e9aed6440ac | |
parent | 1c7a8efa90003d21e19b47bbad185390cea5963c (diff) | |
download | serd-7219a81ad6cff840eb604983b3752d4f2097532a.tar.gz serd-7219a81ad6cff840eb604983b3752d4f2097532a.tar.bz2 serd-7219a81ad6cff840eb604983b3752d4f2097532a.zip |
Add support for writing terse output with minimal newlines
-rw-r--r-- | NEWS | 1 | ||||
-rw-r--r-- | doc/man/serdi.1 | 4 | ||||
-rw-r--r-- | include/serd/statement.h | 2 | ||||
-rw-r--r-- | include/serd/writer.h | 1 | ||||
-rw-r--r-- | src/serdi.c | 3 | ||||
-rw-r--r-- | src/writer.c | 57 | ||||
-rw-r--r-- | test/extra/abbreviate/collapse-graphs.trig | 1 | ||||
-rw-r--r-- | test/extra/terse/blank_object.ttl | 2 | ||||
-rw-r--r-- | test/extra/terse/blank_subject.ttl | 2 | ||||
-rw-r--r-- | test/extra/terse/collection_object.ttl | 2 | ||||
-rw-r--r-- | test/extra/terse/collection_subject.ttl | 2 | ||||
-rw-r--r-- | test/extra/terse/manifest.ttl | 39 | ||||
-rw-r--r-- | test/extra/terse/nil_object.ttl | 2 | ||||
-rw-r--r-- | test/extra/terse/nil_subject.ttl | 2 | ||||
-rw-r--r-- | test/meson.build | 8 | ||||
-rw-r--r-- | test/serd_test_util/__init__.py | 2 | ||||
-rw-r--r-- | test/test_terse_write.c | 112 |
17 files changed, 225 insertions, 17 deletions
@@ -2,6 +2,7 @@ serd (1.1.1) unstable; urgency=medium * Add SerdBuffer for mutable buffers to keep SerdChunk const-correct * Add SerdWorld for shared library state + * Add support for writing terse output with minimal newlines * Add support for xsd:float and xsd:double literals * Bring read/write interface closer to C standard * Make nodes opaque diff --git a/doc/man/serdi.1 b/doc/man/serdi.1 index 24a4ad46..f03fe23d 100644 --- a/doc/man/serdi.1 +++ b/doc/man/serdi.1 @@ -8,7 +8,7 @@ .Nd read and write RDF syntax .Sh SYNOPSIS .Nm serdi -.Op Fl abefhlqv +.Op Fl abefhlqtv .Op Fl c Ar prefix .Op Fl i Ar syntax .Op Fl k Ar bytes @@ -137,6 +137,8 @@ This is useful for keeping relative references within some directory. Parse .Ar string input instead of a file (terminates options). +.It Fl t +Write terser output without newlines. .It Fl v Display version information and exit. .El diff --git a/include/serd/statement.h b/include/serd/statement.h index aee632cb..a1932796 100644 --- a/include/serd/statement.h +++ b/include/serd/statement.h @@ -35,6 +35,8 @@ typedef enum { SERD_ANON_O = 1U << 3U, ///< Start of anonymous object SERD_LIST_S = 1U << 4U, ///< Start of list subject SERD_LIST_O = 1U << 5U, ///< Start of list object + SERD_TERSE_S = 1U << 6U, ///< Start of terse subject + SERD_TERSE_O = 1U << 7U, ///< Start of terse object } SerdStatementFlag; /// Bitwise OR of SerdStatementFlag values diff --git a/include/serd/writer.h b/include/serd/writer.h index db402fea..36880d2a 100644 --- a/include/serd/writer.h +++ b/include/serd/writer.h @@ -39,6 +39,7 @@ typedef enum { SERD_WRITE_UNRESOLVED = 1U << 2U, ///< Do not make URIs relative SERD_WRITE_BULK = 1U << 3U, ///< Write output in pages SERD_WRITE_STRICT = 1U << 4U, ///< Abort with error on lossy output + SERD_WRITE_TERSE = 1U << 5U, ///< Write terser output without newlines } SerdWriterFlag; /// Bitwise OR of #SerdWriterFlag values diff --git a/src/serdi.c b/src/serdi.c index 01e2e764..59c66cae 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -68,6 +68,7 @@ print_usage(const char* const name, const bool error) " -q Suppress all output except data.\n" " -r ROOT_URI Keep relative URIs within ROOT_URI.\n" " -s INPUT Parse INPUT as string (terminates options).\n" + " -t Write terser output without newlines.\n" " -v Display version information and exit.\n"; FILE* const os = error ? stderr : stdout; @@ -142,6 +143,8 @@ main(int argc, char** argv) writer_flags &= ~(SerdWriterFlags)SERD_WRITE_STRICT; } else if (opt == 'q') { quiet = true; + } else if (opt == 't') { + writer_flags |= SERD_WRITE_TERSE; } else if (opt == 'v') { return print_version(); } else if (opt == 's') { diff --git a/src/writer.c b/src/writer.c index 19ce926b..3079cebc 100644 --- a/src/writer.c +++ b/src/writer.c @@ -73,8 +73,11 @@ typedef enum { SEP_ANON_S_P, ///< Between anonymous subject and predicate (whitespace) SEP_ANON_END, ///< End of anonymous node (']') SEP_LIST_BEGIN, ///< Start of list ('(') - SEP_LIST_SEP, ///< List separator (whitespace) + SEP_LIST_SEP, ///< List separator (newline) SEP_LIST_END, ///< End of list (')') + SEP_TLIST_BEGIN, ///< Start of terse list ('(') + SEP_TLIST_SEP, ///< Terse list separator (space) + SEP_TLIST_END, ///< End of terse list (')') SEP_GRAPH_BEGIN, ///< Start of graph ('{') SEP_GRAPH_END, ///< End of graph ('}') } Sep; @@ -97,7 +100,7 @@ static const SepRule rules[] = { {NIL, +0, SEP_NONE, SEP_NONE, SEP_NONE}, {NIL, +0, SEP_NONE, SEP_NONE, SEP_NONE}, {'\n', 0, SEP_NONE, SEP_NONE, SEP_NONE}, - {'.', +0, SEP_EACH, SEP_NONE, SEP_EACH}, + {'.', +0, SEP_EACH, SEP_NONE, SEP_NONE}, {'.', +0, SEP_EACH, SEP_NONE, SEP_NONE}, {';', +0, SEP_EACH, SEP_NONE, SEP_EACH}, {',', +0, SEP_EACH, SEP_NONE, SEP_EACH}, @@ -106,12 +109,15 @@ static const SepRule rules[] = { {',', +0, SEP_EACH, SEP_NONE, SEP_NONE}, {NIL, +1, SEP_NONE, SEP_NONE, SEP_EACH}, {' ', +0, SEP_NONE, SEP_NONE, SEP_NONE}, - {'[', +1, M(SEP_JOIN_O_AA), SEP_NONE, SEP_NONE}, + {'[', +1, M(SEP_JOIN_O_AA), M(SEP_TLIST_BEGIN) | M(SEP_TLIST_SEP), SEP_NONE}, {NIL, +1, SEP_NONE, SEP_NONE, M(SEP_ANON_BEGIN)}, {']', -1, SEP_NONE, ~M(SEP_ANON_BEGIN), SEP_NONE}, {'(', +1, M(SEP_JOIN_O_AA), SEP_NONE, SEP_EACH}, {NIL, +0, SEP_NONE, SEP_EACH, SEP_NONE}, {')', -1, SEP_NONE, SEP_EACH, SEP_NONE}, + {'(', +1, SEP_NONE, SEP_NONE, SEP_NONE}, + {NIL, +0, SEP_EACH, SEP_NONE, SEP_NONE}, + {')', -1, SEP_NONE, SEP_NONE, SEP_NONE}, {'{', +1, SEP_EACH, SEP_NONE, SEP_EACH}, {'}', -1, SEP_NONE, SEP_NONE, SEP_EACH}, }; @@ -582,10 +588,14 @@ uri_sink(const void* buf, size_t size, size_t nmemb, void* stream) } SERD_NODISCARD static SerdStatus -write_newline(SerdWriter* writer) +write_newline(SerdWriter* writer, bool terse) { SerdStatus st = SERD_SUCCESS; + if (terse || (writer->flags & SERD_WRITE_TERSE)) { + return esink(" ", 1, writer); + } + TRY(st, esink("\n", 1, writer)); for (int i = 0; i < writer->indent; ++i) { TRY(st, esink("\t", 1, writer)); @@ -595,16 +605,29 @@ write_newline(SerdWriter* writer) } SERD_NODISCARD static SerdStatus -write_sep(SerdWriter* writer, const SerdStatementFlags flags, Sep sep) +write_top_level_sep(SerdWriter* writer) { - (void)flags; + return (writer->last_sep && !(writer->flags & SERD_WRITE_TERSE)) + ? write_newline(writer, false) + : SERD_SUCCESS; +} +SERD_NODISCARD static SerdStatus +write_sep(SerdWriter* writer, const SerdStatementFlags flags, Sep sep) +{ SerdStatus st = SERD_SUCCESS; const SepRule* const rule = &rules[sep]; const bool pre_line = (rule->pre_line_after & (1U << writer->last_sep)); const bool post_line = (rule->post_line_after & (1U << writer->last_sep)); + const bool terse = (((flags & SERD_TERSE_S) && (flags & SERD_LIST_S)) || + ((flags & SERD_TERSE_O) && (flags & SERD_LIST_O))); + + if (terse && sep >= SEP_LIST_BEGIN && sep <= SEP_LIST_END) { + sep = (Sep)((int)sep + 3); // Switch to corresponding terse separator + } + // Adjust indent, but tolerate if it would become negative if (rule->indent && (pre_line || post_line)) { writer->indent = ((rule->indent >= 0 || writer->indent >= -rule->indent) @@ -620,7 +643,7 @@ write_sep(SerdWriter* writer, const SerdStatementFlags flags, Sep sep) // Write newline or space before separator if necessary if (pre_line) { - TRY(st, write_newline(writer)); + TRY(st, write_newline(writer, terse)); } else if (rule->pre_space_after & (1U << writer->last_sep)) { TRY(st, esink(" ", 1, writer)); } @@ -632,18 +655,20 @@ write_sep(SerdWriter* writer, const SerdStatementFlags flags, Sep sep) // Write newline after separator if necessary if (post_line) { - TRY(st, write_newline(writer)); + TRY(st, write_newline(writer, terse)); if (rule->post_line_after != ~(SepMask)0U) { writer->last_sep = SEP_NEWLINE; } } // Reset context and write a blank line after ends of subjects - if (sep == SEP_END_S) { + if (sep == SEP_END_S || sep == SEP_END_DIRECT) { writer->indent = ctx(writer, SERD_GRAPH) ? 1 : 0; writer->context.predicates = false; writer->context.comma_indented = false; - TRY(st, esink("\n", 1, writer)); + if (!terse) { + TRY(st, esink("\n", 1, writer)); + } } writer->last_sep = sep; @@ -987,8 +1012,10 @@ serd_writer_write_statement(SerdWriter* const writer, const SerdNode* const graph = serd_statement_graph(statement); if (!is_resource(subject) || !is_resource(predicate) || !object || - ((flags & SERD_ANON_S) && (flags & SERD_LIST_S)) || - ((flags & SERD_ANON_O) && (flags & SERD_LIST_O))) { + ((flags & SERD_ANON_S) && (flags & SERD_LIST_S)) || // Nonsense + ((flags & SERD_ANON_O) && (flags & SERD_LIST_O)) || // Nonsense + ((flags & SERD_ANON_S) && (flags & SERD_TERSE_S)) || // Unsupported + ((flags & SERD_ANON_O) && (flags & SERD_TERSE_O))) { // Unsupported return SERD_BAD_ARG; } @@ -1013,7 +1040,7 @@ serd_writer_write_statement(SerdWriter* const writer, TRY(st, terminate_context(writer)); reset_context(writer, RESET_GRAPH | RESET_INDENT); if (graph) { - TRY(st, write_newline(writer)); + TRY(st, write_top_level_sep(writer)); TRY(st, write_node(writer, graph, SERD_GRAPH, flags)); TRY(st, write_sep(writer, flags, SEP_GRAPH_BEGIN)); serd_node_set(&writer->context.graph, graph); @@ -1072,7 +1099,7 @@ serd_writer_write_statement(SerdWriter* const writer, } if (writer->last_sep == SEP_END_S || writer->last_sep == SEP_END_DIRECT) { - TRY(st, write_newline(writer)); + TRY(st, write_top_level_sep(writer)); } TRY(st, write_node(writer, subject, SERD_SUBJECT, flags)); @@ -1172,8 +1199,10 @@ serd_writer_finish(SerdWriter* writer) { const SerdStatus st0 = terminate_context(writer); const SerdStatus st1 = serd_byte_sink_flush(&writer->byte_sink); + free_anon_stack(writer); reset_context(writer, RESET_GRAPH | RESET_INDENT); + writer->last_sep = SEP_NONE; return st0 ? st0 : st1; } diff --git a/test/extra/abbreviate/collapse-graphs.trig b/test/extra/abbreviate/collapse-graphs.trig index 5bdf55ce..4f8e4344 100644 --- a/test/extra/abbreviate/collapse-graphs.trig +++ b/test/extra/abbreviate/collapse-graphs.trig @@ -1,4 +1,3 @@ - <http://example.org/g1> { <http://example.org/s1> <http://example.org/p1> <http://example.org/o1> ; diff --git a/test/extra/terse/blank_object.ttl b/test/extra/terse/blank_object.ttl new file mode 100644 index 00000000..270e406b --- /dev/null +++ b/test/extra/terse/blank_object.ttl @@ -0,0 +1,2 @@ +@prefix eg: <http://example.org/> . +eg:s eg:p1 [ eg:p11 1 ; eg:p12 2 ] ; eg:p2 [ eg:p23 3 ; eg:p24 4 ] . diff --git a/test/extra/terse/blank_subject.ttl b/test/extra/terse/blank_subject.ttl new file mode 100644 index 00000000..5e3303f9 --- /dev/null +++ b/test/extra/terse/blank_subject.ttl @@ -0,0 +1,2 @@ +@prefix eg: <http://example.org/> . +[] eg:p1 1 ; eg:p2 2 . diff --git a/test/extra/terse/collection_object.ttl b/test/extra/terse/collection_object.ttl new file mode 100644 index 00000000..3310c418 --- /dev/null +++ b/test/extra/terse/collection_object.ttl @@ -0,0 +1,2 @@ +@prefix eg: <http://example.org/> . +eg:s eg:p1 ( 1 2 ) ; eg:p2 ( 3 4 ) . diff --git a/test/extra/terse/collection_subject.ttl b/test/extra/terse/collection_subject.ttl new file mode 100644 index 00000000..43670620 --- /dev/null +++ b/test/extra/terse/collection_subject.ttl @@ -0,0 +1,2 @@ +@prefix eg: <http://example.org/> . +( 1 2 ) eg:p3 3 ; eg:p4 4 . diff --git a/test/extra/terse/manifest.ttl b/test/extra/terse/manifest.ttl new file mode 100644 index 00000000..6773b97d --- /dev/null +++ b/test/extra/terse/manifest.ttl @@ -0,0 +1,39 @@ +@prefix mf: <http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#> . +@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . +@prefix rdft: <http://www.w3.org/ns/rdftest#> . + +<> + a mf:Manifest ; + rdfs:comment "Serd terse writing test suite" ; + mf:entries ( + <#blank_object> + <#blank_subject> + <#collection_object> + <#collection_subject> + <#nil_object> + <#nil_subject> + ) . + +<#blank_object> + a rdft:TestTurtleEval ; + mf:action <blank_object.ttl> ; + mf:name "blank_object" ; + mf:result <blank_object.ttl> . + +<#blank_subject> + a rdft:TestTurtleEval ; + mf:action <blank_subject.ttl> ; + mf:name "blank_subject" ; + mf:result <blank_subject.ttl> . + +<#collection_object> + a rdft:TestTurtleEval ; + mf:action <collection_object.ttl> ; + mf:name "collection_object" ; + mf:result <collection_object.ttl> . + +<#collection_subject> + a rdft:TestTurtleEval ; + mf:action <collection_subject.ttl> ; + mf:name "collection_subject" ; + mf:result <collection_subject.ttl> . diff --git a/test/extra/terse/nil_object.ttl b/test/extra/terse/nil_object.ttl new file mode 100644 index 00000000..4a0f93bd --- /dev/null +++ b/test/extra/terse/nil_object.ttl @@ -0,0 +1,2 @@ +@prefix eg: <http://example.org/> . +eg:s eg:p1 () ; eg:p2 [] , () . diff --git a/test/extra/terse/nil_subject.ttl b/test/extra/terse/nil_subject.ttl new file mode 100644 index 00000000..a08f7d82 --- /dev/null +++ b/test/extra/terse/nil_subject.ttl @@ -0,0 +1,2 @@ +@prefix eg: <http://example.org/> . +() eg:p1 1 ; eg:p2 2 . diff --git a/test/meson.build b/test/meson.build index 937b9038..2ed73e63 100644 --- a/test/meson.build +++ b/test/meson.build @@ -34,6 +34,7 @@ ttl_metadata_file_paths = [ 'extra/pretty/manifest.ttl', 'extra/qualify/manifest.ttl', 'extra/root/manifest.ttl', + 'extra/terse/manifest.ttl', ] plot_scripts = files(plot_script_paths) @@ -129,6 +130,7 @@ unit_tests = [ 'statement', 'string', 'syntax', + 'terse_write', 'uri', 'world', 'writer', @@ -407,6 +409,12 @@ test_suites = { '--', ['-r', 'http://example.org/top/root/'], ], + 'terse': [ + files('extra/terse/manifest.ttl'), + ns_serdtest + 'terse/', + '--', + '-t', + ], } # Run every test suite with serdi diff --git a/test/serd_test_util/__init__.py b/test/serd_test_util/__init__.py index 8027462b..04876f98 100644 --- a/test/serd_test_util/__init__.py +++ b/test/serd_test_util/__init__.py @@ -60,7 +60,7 @@ def print_result_summary(results): failed, total = (results.n_failures, results.n_tests) if failed == 0: - sys.stdout.write("All {} tests passed\n".format(total)) + print("All {} tests passed".format(total)) else: error("{}/{} tests failed".format(failed, total)) diff --git a/test/test_terse_write.c b/test/test_terse_write.c new file mode 100644 index 00000000..277f025e --- /dev/null +++ b/test/test_terse_write.c @@ -0,0 +1,112 @@ +// Copyright 2019-2020 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#undef NDEBUG + +#include "serd/buffer.h" +#include "serd/env.h" +#include "serd/node.h" +#include "serd/sink.h" +#include "serd/statement.h" +#include "serd/stream.h" +#include "serd/string_view.h" +#include "serd/syntax.h" +#include "serd/world.h" +#include "serd/writer.h" + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#" + +static void +check_output(SerdWriter* writer, SerdBuffer* buffer, const char* expected) +{ + serd_writer_finish(writer); + serd_buffer_sink_finish(buffer); + + const char* output = (const char*)buffer->buf; + + assert(!strcmp(output, expected)); + + buffer->len = 0; +} + +static int +test(void) +{ + SerdBuffer buffer = {NULL, 0}; + SerdWorld* world = serd_world_new(); + SerdEnv* env = serd_env_new(serd_empty_string()); + + SerdNode* b1 = serd_new_blank(serd_string("b1")); + SerdNode* l1 = serd_new_blank(serd_string("l1")); + SerdNode* l2 = serd_new_blank(serd_string("l2")); + SerdNode* s1 = serd_new_string(serd_string("s1")); + SerdNode* s2 = serd_new_string(serd_string("s2")); + + SerdNode* rdf_first = serd_new_uri(serd_string(NS_RDF "first")); + SerdNode* rdf_value = serd_new_uri(serd_string(NS_RDF "value")); + SerdNode* rdf_rest = serd_new_uri(serd_string(NS_RDF "rest")); + SerdNode* rdf_nil = serd_new_uri(serd_string(NS_RDF "nil")); + + serd_env_set_prefix(env, serd_string("rdf"), serd_string(NS_RDF)); + + SerdWriter* writer = serd_writer_new( + world, SERD_TURTLE, 0, env, (SerdWriteFunc)serd_buffer_sink, &buffer); + + const SerdSink* sink = serd_writer_sink(writer); + + // Simple lone list + serd_sink_write(sink, SERD_TERSE_S | SERD_LIST_S, l1, rdf_first, s1, NULL); + serd_sink_write(sink, 0, l1, rdf_rest, l2, NULL); + serd_sink_write(sink, 0, l2, rdf_first, s2, NULL); + serd_sink_write(sink, 0, l2, rdf_rest, rdf_nil, NULL); + check_output(writer, &buffer, "( \"s1\" \"s2\" ) .\n"); + + // Nested terse lists + serd_sink_write(sink, + SERD_TERSE_S | SERD_LIST_S | SERD_TERSE_O | SERD_LIST_O, + l1, + rdf_first, + l2, + NULL); + serd_sink_write(sink, 0, l2, rdf_first, s1, NULL); + serd_sink_write(sink, 0, l1, rdf_rest, rdf_nil, NULL); + serd_sink_write(sink, 0, l2, rdf_rest, rdf_nil, NULL); + check_output(writer, &buffer, "( ( \"s1\" ) ) .\n"); + + // List as object + serd_sink_write( + sink, SERD_EMPTY_S | SERD_LIST_O | SERD_TERSE_O, b1, rdf_value, l1, NULL); + serd_sink_write(sink, 0, l1, rdf_first, s1, NULL); + serd_sink_write(sink, 0, l1, rdf_rest, l2, NULL); + serd_sink_write(sink, 0, l2, rdf_first, s2, NULL); + serd_sink_write(sink, 0, l2, rdf_rest, rdf_nil, NULL); + check_output(writer, &buffer, "[] rdf:value ( \"s1\" \"s2\" ) .\n"); + + serd_buffer_sink_finish(&buffer); + serd_writer_free(writer); + serd_node_free(rdf_nil); + serd_node_free(rdf_rest); + serd_node_free(rdf_value); + serd_node_free(rdf_first); + serd_node_free(s2); + serd_node_free(s1); + serd_node_free(l2); + serd_node_free(l1); + serd_node_free(b1); + serd_env_free(env); + serd_world_free(world); + free(buffer.buf); + + return 0; +} + +int +main(void) +{ + return test(); +} |