From 02507b57fae1e29572a11be8894b7dde9048da5d Mon Sep 17 00:00:00 2001 From: David Robillard Date: Wed, 13 Jan 2021 20:22:49 +0100 Subject: Add support for writing terse collections --- NEWS | 1 + include/serd/serd.h | 4 +- src/writer.c | 35 +++++++---- test/meson.build | 5 ++ test/run_test_suite.py | 12 +++- test/terse/blank_object.ttl | 2 + test/terse/blank_subject.ttl | 2 + test/terse/collection_object.ttl | 2 + test/terse/collection_subject.ttl | 2 + test/terse/manifest.ttl | 38 ++++++++++++ test/test_terse_write.c | 123 ++++++++++++++++++++++++++++++++++++++ 11 files changed, 211 insertions(+), 15 deletions(-) create mode 100644 test/terse/blank_object.ttl create mode 100644 test/terse/blank_subject.ttl create mode 100644 test/terse/collection_object.ttl create mode 100644 test/terse/collection_subject.ttl create mode 100644 test/terse/manifest.ttl create mode 100644 test/test_terse_write.c diff --git a/NEWS b/NEWS index 6768abbf..4b40e14b 100644 --- a/NEWS +++ b/NEWS @@ -3,6 +3,7 @@ serd (1.0.1) unstable; * Add SerdBuffer for mutable buffers to keep SerdChunk const-correct * Add SerdWorld for shared library state * Add option for writing terse output without newlines + * Add support for writing terse collections * Add support for xsd:float and xsd:double literals * Bring read/write interface closer to C standard * Make nodes opaque diff --git a/include/serd/serd.h b/include/serd/serd.h index 77cf2478..91994912 100644 --- a/include/serd/serd.h +++ b/include/serd/serd.h @@ -137,7 +137,9 @@ typedef enum { SERD_ANON_S = 1u << 1u, ///< Start of anonymous subject SERD_ANON_O = 1u << 2u, ///< Start of anonymous object SERD_LIST_S = 1u << 3u, ///< Start of list subject - SERD_LIST_O = 1u << 4u ///< Start of list object + SERD_LIST_O = 1u << 4u, ///< Start of list object + SERD_TERSE_S = 1u << 5u, ///< Terse serialisation of new subject + SERD_TERSE_O = 1u << 6u ///< Terse serialisation of new object } SerdStatementFlag; /// Bitwise OR of SerdStatementFlag values diff --git a/src/writer.c b/src/writer.c index 1e453ddd..bf869967 100644 --- a/src/writer.c +++ b/src/writer.c @@ -61,8 +61,11 @@ typedef enum { SEP_ANON_S_P, ///< Between start of anonymous node and predicate SEP_ANON_END, ///< End of anonymous node (']') SEP_LIST_BEGIN, ///< Start of list ('(') - SEP_LIST_SEP, ///< List separator (whitespace) + SEP_LIST_SEP, ///< List separator (newline) SEP_LIST_END, ///< End of list (')') + SEP_TLIST_BEGIN, ///< Start of terse list ('(') + SEP_TLIST_SEP, ///< Terse list separator (space) + SEP_TLIST_END, ///< End of terse list (')') SEP_GRAPH_BEGIN, ///< Start of graph ('{') SEP_GRAPH_END, ///< End of graph ('}') } Sep; @@ -88,12 +91,15 @@ static const SepRule rules[] = { {",", 1, +0, SEP_ALL, SEP_NONE, ~(M(SEP_ANON_END) | M(SEP_LIST_END))}, {"", 0, +1, SEP_NONE, SEP_NONE, SEP_ALL}, {" ", 1, +0, SEP_NONE, SEP_NONE, SEP_NONE}, - {"[", 1, +1, M(SEP_END_O), SEP_NONE, SEP_NONE}, + {"[", 1, +1, M(SEP_END_O), M(SEP_TLIST_BEGIN) | M(SEP_TLIST_SEP), SEP_NONE}, {"", 0, +0, SEP_NONE, SEP_ALL, SEP_NONE}, {"]", 1, -1, SEP_NONE, ~M(SEP_ANON_BEGIN), SEP_NONE}, {"(", 1, +1, M(SEP_END_O), SEP_NONE, SEP_ALL}, {"", 0, +0, SEP_NONE, SEP_ALL, SEP_NONE}, {")", 1, -1, SEP_NONE, SEP_ALL, SEP_NONE}, + {"(", 1, +1, SEP_NONE, SEP_NONE, SEP_NONE}, + {"", 0, +0, SEP_ALL, SEP_NONE, SEP_NONE}, + {")", 1, -1, SEP_NONE, SEP_NONE, SEP_NONE}, {"{", 1, +1, SEP_ALL, SEP_NONE, SEP_NONE}, {"}", 1, -1, SEP_NONE, SEP_NONE, SEP_ALL}, {"<", 1, +0, SEP_NONE, SEP_NONE, SEP_NONE}, @@ -491,9 +497,9 @@ uri_sink(const void* buf, size_t size, size_t nmemb, void* stream) } static void -write_newline(SerdWriter* writer) +write_newline(SerdWriter* writer, bool terse) { - if (writer->flags & SERD_WRITE_TERSE) { + if (terse || (writer->flags & SERD_WRITE_TERSE)) { sink(" ", 1, writer); } else { sink("\n", 1, writer); @@ -507,16 +513,19 @@ static void write_top_level_sep(SerdWriter* writer) { if (!writer->empty && !(writer->flags & SERD_WRITE_TERSE)) { - write_newline(writer); + write_newline(writer, false); } } static bool write_sep(SerdWriter* writer, const SerdStatementFlags flags, Sep sep) { - (void)flags; - - const SepRule* rule = &rules[sep]; + const SepRule* rule = &rules[sep]; + const bool terse = (((flags & SERD_TERSE_S) && (flags & SERD_LIST_S)) || + ((flags & SERD_TERSE_O) && (flags & SERD_LIST_O))); + if (terse && sep >= SEP_LIST_BEGIN && sep <= SEP_LIST_END) { + sep = (Sep)((int)sep + 3); // Switch to corresponding terse separator + } // Adjust indent, but tolerate if it would become negative if ((rule->pre_line_after & (1u << writer->last_sep) || @@ -528,7 +537,7 @@ write_sep(SerdWriter* writer, const SerdStatementFlags flags, Sep sep) // Write newline or space before separator if necessary if (rule->pre_line_after & (1u << writer->last_sep)) { - write_newline(writer); + write_newline(writer, terse); } else if (rule->pre_space_after & (1u << writer->last_sep)) { sink(" ", 1, writer); } @@ -540,7 +549,7 @@ write_sep(SerdWriter* writer, const SerdStatementFlags flags, Sep sep) // Write newline after separator if necessary if (rule->post_line_after & (1u << writer->last_sep)) { - write_newline(writer); + write_newline(writer, terse); writer->last_sep = SEP_NONE; } else { writer->last_sep = sep; @@ -872,8 +881,10 @@ serd_writer_write_statement(SerdWriter* writer, const SerdNode* const graph = serd_statement_graph(statement); if (!is_resource(subject) || !is_resource(predicate) || !object || - ((flags & SERD_ANON_S) && (flags & SERD_LIST_S)) || - ((flags & SERD_ANON_O) && (flags & SERD_LIST_O))) { + ((flags & SERD_ANON_S) && (flags & SERD_LIST_S)) || // Nonsense + ((flags & SERD_ANON_O) && (flags & SERD_LIST_O)) || // Nonsense + ((flags & SERD_ANON_S) && (flags & SERD_TERSE_S)) || // Unsupported + ((flags & SERD_ANON_O) && (flags & SERD_TERSE_O))) { // Unsupported return SERD_ERR_BAD_ARG; } diff --git a/test/meson.build b/test/meson.build index 040b04fa..3926888d 100644 --- a/test/meson.build +++ b/test/meson.build @@ -12,6 +12,7 @@ unit_tests = [ 'read_chunk', 'reader_writer', 'string', + 'terse_write', 'uri', ] @@ -139,6 +140,10 @@ if get_option('utils') timeout: 240) endforeach + test('terse', run_test_suite, + args: script_args + ['--osyntax', 'turtle', manifest, base_uri, '--', '-t'], + suite: ['rdf', 'serd'], + timeout: 240) ### Run the lax suite with lax parsing enabled as well manifest = files('lax/manifest.ttl') diff --git a/test/run_test_suite.py b/test/run_test_suite.py index 2d52e576..6a595e98 100755 --- a/test/run_test_suite.py +++ b/test/run_test_suite.py @@ -211,6 +211,7 @@ def test_suite( base_uri, report_filename, isyntax, + output_syntax, command_prefix, ): """Run all tests in a test suite manifest.""" @@ -235,8 +236,11 @@ def test_suite( def run_tests(test_class, tests, expected_return, results): thru_flags = [["-e"], ["-f"], ["-b"], ["-r", "http://example.org/"]] - osyntax = _test_output_syntax(test_class) thru_options_iter = _option_combinations(thru_flags) + if output_syntax is not None: + osyntax = output_syntax + else: + osyntax = _test_output_syntax(test_class) for test in sorted(tests): test_uri = model[test][mf + "action"][0] @@ -244,7 +248,9 @@ def test_suite( test_name = os.path.basename(test_uri_path) test_path = os.path.join(test_dir, test_name) - command = command_prefix + ["-a"] + [test_path, test_uri] + command = ( + command_prefix + ["-a", "-o", osyntax] + [test_path, test_uri] + ) command_string = " ".join(shlex.quote(c) for c in command) out_filename = os.path.join(out_test_dir, test_name + ".out") @@ -360,6 +366,7 @@ def main(): parser.add_argument("--report", help="path to write result report to") parser.add_argument("--serdi", default="serdi", help="path to serdi") parser.add_argument("--syntax", default="turtle", help="input syntax") + parser.add_argument("--osyntax", default=None, help="output syntax") parser.add_argument("--wrapper", default="", help="executable wrapper") parser.add_argument("manifest", help="test suite manifest.ttl file") parser.add_argument("base_uri", help="base URI for tests") @@ -377,6 +384,7 @@ def main(): args.base_uri, args.report, args.syntax, + args.osyntax, command_prefix, ) diff --git a/test/terse/blank_object.ttl b/test/terse/blank_object.ttl new file mode 100644 index 00000000..270e406b --- /dev/null +++ b/test/terse/blank_object.ttl @@ -0,0 +1,2 @@ +@prefix eg: . +eg:s eg:p1 [ eg:p11 1 ; eg:p12 2 ] ; eg:p2 [ eg:p23 3 ; eg:p24 4 ] . diff --git a/test/terse/blank_subject.ttl b/test/terse/blank_subject.ttl new file mode 100644 index 00000000..5e3303f9 --- /dev/null +++ b/test/terse/blank_subject.ttl @@ -0,0 +1,2 @@ +@prefix eg: . +[] eg:p1 1 ; eg:p2 2 . diff --git a/test/terse/collection_object.ttl b/test/terse/collection_object.ttl new file mode 100644 index 00000000..3310c418 --- /dev/null +++ b/test/terse/collection_object.ttl @@ -0,0 +1,2 @@ +@prefix eg: . +eg:s eg:p1 ( 1 2 ) ; eg:p2 ( 3 4 ) . diff --git a/test/terse/collection_subject.ttl b/test/terse/collection_subject.ttl new file mode 100644 index 00000000..43670620 --- /dev/null +++ b/test/terse/collection_subject.ttl @@ -0,0 +1,2 @@ +@prefix eg: . +( 1 2 ) eg:p3 3 ; eg:p4 4 . diff --git a/test/terse/manifest.ttl b/test/terse/manifest.ttl new file mode 100644 index 00000000..f8dca816 --- /dev/null +++ b/test/terse/manifest.ttl @@ -0,0 +1,38 @@ +@prefix mf: . +@prefix rdf: . +@prefix rdfs: . +@prefix rdft: . + +<> + rdf:type mf:Manifest ; + rdfs:comment "Serd terse serialisation test cases" ; + mf:entries ( + <#blank_object> + <#blank_subject> + <#collection_object> + <#collection_subject> + ) . + +<#blank_object> + rdf:type rdft:TestTurtleEval ; + mf:name "blank_object" ; + mf:action ; + mf:result . + +<#blank_subject> + rdf:type rdft:TestTurtleEval ; + mf:name "blank_subject" ; + mf:action ; + mf:result . + +<#collection_object> + rdf:type rdft:TestTurtleEval ; + mf:name "collection_object" ; + mf:action ; + mf:result . + +<#collection_subject> + rdf:type rdft:TestTurtleEval ; + mf:name "collection_subject" ; + mf:action ; + mf:result . diff --git a/test/test_terse_write.c b/test/test_terse_write.c new file mode 100644 index 00000000..d6f24357 --- /dev/null +++ b/test/test_terse_write.c @@ -0,0 +1,123 @@ +/* + Copyright 2019-2020 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#undef NDEBUG + +#include "serd/serd.h" + +#include +#include +#include +#include + +#define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#" + +static void +check_output(SerdWriter* writer, SerdBuffer* buffer, const char* expected) +{ + serd_writer_finish(writer); + serd_buffer_sink_finish(buffer); + + const char* output = (const char*)buffer->buf; + const int valid = !strcmp(output, expected); + if (valid) { + fprintf(stderr, "%s", output); + } else { + fprintf(stderr, "error: Invalid output:\n%s", output); + fprintf(stderr, "note: Expected output:\n%s", expected); + } + assert(valid); + buffer->len = 0; +} + +static int +test(void) +{ + SerdBuffer buffer = {NULL, 0}; + SerdWorld* world = serd_world_new(); + SerdEnv* env = serd_env_new(SERD_EMPTY_STRING()); + SerdNodes* nodes = serd_nodes_new(); + + const SerdNode* b1 = + serd_nodes_manage(nodes, serd_new_blank(SERD_STATIC_STRING("b1"))); + const SerdNode* l1 = + serd_nodes_manage(nodes, serd_new_blank(SERD_STATIC_STRING("l1"))); + const SerdNode* l2 = + serd_nodes_manage(nodes, serd_new_blank(SERD_STATIC_STRING("l2"))); + const SerdNode* s1 = + serd_nodes_manage(nodes, serd_new_string(SERD_STATIC_STRING("s1"))); + const SerdNode* s2 = + serd_nodes_manage(nodes, serd_new_string(SERD_STATIC_STRING("s2"))); + const SerdNode* rdf_first = + serd_nodes_manage(nodes, serd_new_uri(SERD_STATIC_STRING(NS_RDF "first"))); + const SerdNode* rdf_rest = + serd_nodes_manage(nodes, serd_new_uri(SERD_STATIC_STRING(NS_RDF "rest"))); + const SerdNode* rdf_nil = + serd_nodes_manage(nodes, serd_new_uri(SERD_STATIC_STRING(NS_RDF "nil"))); + const SerdNode* rdf_value = + serd_nodes_manage(nodes, serd_new_uri(SERD_STATIC_STRING(NS_RDF "value"))); + + serd_env_set_prefix( + env, SERD_STATIC_STRING("rdf"), SERD_STATIC_STRING(NS_RDF)); + + SerdWriter* writer = serd_writer_new( + world, SERD_TURTLE, 0, env, (SerdWriteFunc)serd_buffer_sink, &buffer); + + const SerdSink* sink = serd_writer_sink(writer); + + // Simple lone list + serd_sink_write(sink, SERD_TERSE_S | SERD_LIST_S, l1, rdf_first, s1, NULL); + serd_sink_write(sink, 0, l1, rdf_rest, l2, NULL); + serd_sink_write(sink, 0, l2, rdf_first, s2, NULL); + serd_sink_write(sink, 0, l2, rdf_rest, rdf_nil, NULL); + check_output(writer, &buffer, "( \"s1\" \"s2\" ) .\n"); + + // Nested terse lists + serd_sink_write(sink, + SERD_TERSE_S | SERD_LIST_S | SERD_TERSE_O | SERD_LIST_O, + l1, + rdf_first, + l2, + NULL); + serd_sink_write(sink, 0, l2, rdf_first, s1, NULL); + serd_sink_write(sink, 0, l1, rdf_rest, rdf_nil, NULL); + serd_sink_write(sink, 0, l2, rdf_rest, rdf_nil, NULL); + check_output(writer, &buffer, "( ( \"s1\" ) ) .\n"); + + // List as object + serd_sink_write( + sink, SERD_EMPTY_S | SERD_LIST_O | SERD_TERSE_O, b1, rdf_value, l1, NULL); + serd_sink_write(sink, 0, l1, rdf_first, s1, NULL); + serd_sink_write(sink, 0, l1, rdf_rest, l2, NULL); + serd_sink_write(sink, 0, l2, rdf_first, s2, NULL); + serd_sink_write(sink, 0, l2, rdf_rest, rdf_nil, NULL); + check_output(writer, &buffer, "[]\n\trdf:value ( \"s1\" \"s2\" ) .\n"); + + serd_buffer_sink_finish(&buffer); + serd_writer_free(writer); + serd_nodes_free(nodes); + serd_env_free(env); + serd_world_free(world); + free(buffer.buf); + + return 0; +} + +int +main(void) +{ + return test(); +} -- cgit v1.2.1