From b717579988e675a5e4e5e826ff33097112a87934 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Sun, 10 Mar 2019 22:50:31 +0100 Subject: Add support for writing terse collections --- NEWS | 1 + serd/serd.h | 4 +- src/writer.c | 35 +++++++---- tests/terse/blank_object.ttl | 2 + tests/terse/blank_subject.ttl | 2 + tests/terse/collection_object.ttl | 2 + tests/terse/collection_subject.ttl | 2 + tests/terse/manifest.ttl | 38 ++++++++++++ tests/terse_write_test.c | 117 +++++++++++++++++++++++++++++++++++++ wscript | 23 ++++++-- 10 files changed, 209 insertions(+), 17 deletions(-) create mode 100644 tests/terse/blank_object.ttl create mode 100644 tests/terse/blank_subject.ttl create mode 100644 tests/terse/collection_object.ttl create mode 100644 tests/terse/collection_subject.ttl create mode 100644 tests/terse/manifest.ttl create mode 100644 tests/terse_write_test.c diff --git a/NEWS b/NEWS index 5bcd4ee2..75f2dd3c 100644 --- a/NEWS +++ b/NEWS @@ -7,6 +7,7 @@ serd (1.0.1) unstable; * Add model for storing statements in memory * Add option for writing terse output without newlines * Add support for validation + * Add support for writing terse collections * Bring read/write interface closer to C standard * Make nodes opaque * Make serd_strtod API const-correct diff --git a/serd/serd.h b/serd/serd.h index 67ecf1db..a5b28676 100644 --- a/serd/serd.h +++ b/serd/serd.h @@ -127,7 +127,9 @@ typedef enum { SERD_ANON_S = 1 << 1, ///< Start of anonymous subject SERD_ANON_O = 1 << 2, ///< Start of anonymous object SERD_LIST_S = 1 << 3, ///< Start of list subject - SERD_LIST_O = 1 << 4 ///< Start of list object + SERD_LIST_O = 1 << 4, ///< Start of list object + SERD_TERSE_S = 1 << 5, ///< Terse serialisation of new subject + SERD_TERSE_O = 1 << 6 ///< Terse serialisation of new object } SerdStatementFlag; /// Bitwise OR of SerdStatementFlag values diff --git a/src/writer.c b/src/writer.c index 2e2c2618..cd251fa2 100644 --- a/src/writer.c +++ b/src/writer.c @@ -66,8 +66,11 @@ typedef enum { SEP_ANON_S_P, ///< Between start of anonymous node and predicate SEP_ANON_END, ///< End of anonymous node (']') SEP_LIST_BEGIN, ///< Start of list ('(') - SEP_LIST_SEP, ///< List separator (whitespace) + SEP_LIST_SEP, ///< List separator (newline) SEP_LIST_END, ///< End of list (')') + SEP_TLIST_BEGIN, ///< Start of terse list ('(') + SEP_TLIST_SEP, ///< Terse list separator (space) + SEP_TLIST_END, ///< End of terse list (')') SEP_GRAPH_BEGIN, ///< Start of graph ('{') SEP_GRAPH_END, ///< End of graph ('}') } Sep; @@ -93,12 +96,15 @@ static const SepRule rules[] = { {",", 1, +0, SEP_ALL, SEP_NONE, ~(M(SEP_ANON_END) | M(SEP_LIST_END))}, {"", 0, +1, SEP_NONE, SEP_NONE, SEP_ALL}, {" ", 1, +0, SEP_NONE, SEP_NONE, SEP_NONE}, - {"[", 1, +1, M(SEP_END_O), SEP_NONE, SEP_NONE}, + {"[", 1, +1, M(SEP_END_O), M(SEP_TLIST_BEGIN)|M(SEP_TLIST_SEP), SEP_NONE}, {"", 0, +0, SEP_NONE, SEP_ALL, SEP_NONE}, {"]", 1, -1, SEP_NONE, ~M(SEP_ANON_BEGIN), SEP_NONE}, {"(", 1, +1, M(SEP_END_O), SEP_NONE, SEP_ALL}, {"", 0, +0, SEP_NONE, SEP_ALL, SEP_NONE}, {")", 1, -1, SEP_NONE, SEP_ALL, SEP_NONE}, + {"(", 1, +1, SEP_NONE, SEP_NONE, SEP_NONE}, + {"", 0, +0, SEP_ALL, SEP_NONE, SEP_NONE}, + {")", 1, -1, SEP_NONE, SEP_NONE, SEP_NONE}, {"{", 1, +1, SEP_ALL, SEP_NONE, SEP_NONE}, {"}", 1, -1, SEP_NONE, SEP_NONE, SEP_ALL}, {"<", 1, +0, SEP_NONE, SEP_NONE, SEP_NONE}, @@ -425,9 +431,9 @@ uri_sink(const void* buf, size_t size, size_t nmemb, void* stream) } static void -write_newline(SerdWriter* writer) +write_newline(SerdWriter* writer, bool terse) { - if (writer->flags & SERD_WRITE_TERSE) { + if (terse || (writer->flags & SERD_WRITE_TERSE)) { sink(" ", 1, writer); } else { sink("\n", 1, writer); @@ -441,16 +447,19 @@ static void write_top_level_sep(SerdWriter* writer) { if (!writer->empty && !(writer->flags & SERD_WRITE_TERSE)) { - write_newline(writer); + write_newline(writer, false); } } static bool write_sep(SerdWriter* writer, const SerdStatementFlags flags, Sep sep) { - (void)flags; - - const SepRule* rule = &rules[sep]; + const SepRule* rule = &rules[sep]; + const bool terse = (((flags & SERD_TERSE_S) && (flags & SERD_LIST_S)) || + ((flags & SERD_TERSE_O) && (flags & SERD_LIST_O))); + if (terse && sep >= SEP_LIST_BEGIN && sep <= SEP_LIST_END) { + sep = (Sep)((int)sep + 3); // Switch to corresponding terse separator + } // Adjust indent, but tolerate if it would become negative writer->indent = @@ -460,7 +469,7 @@ write_sep(SerdWriter* writer, const SerdStatementFlags flags, Sep sep) // Write newline or space before separator if necessary if (rule->pre_line_after & (1u << writer->last_sep)) { - write_newline(writer); + write_newline(writer, terse); } else if (rule->pre_space_after & (1u << writer->last_sep)) { sink(" ", 1, writer); } @@ -470,7 +479,7 @@ write_sep(SerdWriter* writer, const SerdStatementFlags flags, Sep sep) // Write newline after separator if necessary if (rule->post_line_after & (1u << writer->last_sep)) { - write_newline(writer); + write_newline(writer, terse); writer->last_sep = SEP_NONE; } else { writer->last_sep = sep; @@ -773,8 +782,10 @@ serd_writer_write_statement(SerdWriter* writer, const SerdNode* const graph = serd_statement_get_graph(statement); if (!is_resource(subject) || !is_resource(predicate) || !object || - ((flags & SERD_ANON_S) && (flags & SERD_LIST_S)) || - ((flags & SERD_ANON_O) && (flags & SERD_LIST_O))) { + ((flags & SERD_ANON_S) && (flags & SERD_LIST_S)) || // Nonsense + ((flags & SERD_ANON_O) && (flags & SERD_LIST_O)) || // Nonsense + ((flags & SERD_ANON_S) && (flags & SERD_TERSE_S)) || // Unsupported + ((flags & SERD_ANON_O) && (flags & SERD_TERSE_O))) { // Unsupported return SERD_ERR_BAD_ARG; } diff --git a/tests/terse/blank_object.ttl b/tests/terse/blank_object.ttl new file mode 100644 index 00000000..270e406b --- /dev/null +++ b/tests/terse/blank_object.ttl @@ -0,0 +1,2 @@ +@prefix eg: . +eg:s eg:p1 [ eg:p11 1 ; eg:p12 2 ] ; eg:p2 [ eg:p23 3 ; eg:p24 4 ] . diff --git a/tests/terse/blank_subject.ttl b/tests/terse/blank_subject.ttl new file mode 100644 index 00000000..5e3303f9 --- /dev/null +++ b/tests/terse/blank_subject.ttl @@ -0,0 +1,2 @@ +@prefix eg: . +[] eg:p1 1 ; eg:p2 2 . diff --git a/tests/terse/collection_object.ttl b/tests/terse/collection_object.ttl new file mode 100644 index 00000000..3310c418 --- /dev/null +++ b/tests/terse/collection_object.ttl @@ -0,0 +1,2 @@ +@prefix eg: . +eg:s eg:p1 ( 1 2 ) ; eg:p2 ( 3 4 ) . diff --git a/tests/terse/collection_subject.ttl b/tests/terse/collection_subject.ttl new file mode 100644 index 00000000..43670620 --- /dev/null +++ b/tests/terse/collection_subject.ttl @@ -0,0 +1,2 @@ +@prefix eg: . +( 1 2 ) eg:p3 3 ; eg:p4 4 . diff --git a/tests/terse/manifest.ttl b/tests/terse/manifest.ttl new file mode 100644 index 00000000..f8dca816 --- /dev/null +++ b/tests/terse/manifest.ttl @@ -0,0 +1,38 @@ +@prefix mf: . +@prefix rdf: . +@prefix rdfs: . +@prefix rdft: . + +<> + rdf:type mf:Manifest ; + rdfs:comment "Serd terse serialisation test cases" ; + mf:entries ( + <#blank_object> + <#blank_subject> + <#collection_object> + <#collection_subject> + ) . + +<#blank_object> + rdf:type rdft:TestTurtleEval ; + mf:name "blank_object" ; + mf:action ; + mf:result . + +<#blank_subject> + rdf:type rdft:TestTurtleEval ; + mf:name "blank_subject" ; + mf:action ; + mf:result . + +<#collection_object> + rdf:type rdft:TestTurtleEval ; + mf:name "collection_object" ; + mf:action ; + mf:result . + +<#collection_subject> + rdf:type rdft:TestTurtleEval ; + mf:name "collection_subject" ; + mf:action ; + mf:result . diff --git a/tests/terse_write_test.c b/tests/terse_write_test.c new file mode 100644 index 00000000..4b6a80c4 --- /dev/null +++ b/tests/terse_write_test.c @@ -0,0 +1,117 @@ +/* + Copyright 2019-2020 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#undef NDEBUG + +#include "serd/serd.h" + +#include +#include +#include +#include + +#define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#" + +static void +check_output(SerdWriter* writer, SerdBuffer* buffer, const char* expected) +{ + serd_writer_finish(writer); + serd_buffer_sink_finish(buffer); + + const char* output = (const char*)buffer->buf; + const int valid = !strcmp(output, expected); + if (valid) { + fprintf(stderr, "%s", output); + } else { + fprintf(stderr, "error: Invalid output:\n%s", output); + fprintf(stderr, "note: Expected output:\n%s", expected); + } + assert(valid); + buffer->len = 0; +} + +static int +test(void) +{ + SerdBuffer buffer = { NULL, 0 }; + SerdWorld* world = serd_world_new(); + SerdEnv* env = serd_env_new(NULL); + SerdNodes* nodes = serd_nodes_new(); + const SerdNode* b1 = serd_nodes_manage(nodes, serd_new_blank("b1")); + const SerdNode* l1 = serd_nodes_manage(nodes, serd_new_blank("l1")); + const SerdNode* l2 = serd_nodes_manage(nodes, serd_new_blank("l2")); + const SerdNode* s1 = serd_nodes_manage(nodes, serd_new_string("s1")); + const SerdNode* s2 = serd_nodes_manage(nodes, serd_new_string("s2")); + + const SerdNode* rdf_first = + serd_nodes_manage(nodes, serd_new_uri(NS_RDF "first")); + const SerdNode* rdf_rest = + serd_nodes_manage(nodes, serd_new_uri(NS_RDF "rest")); + const SerdNode* rdf_nil = + serd_nodes_manage(nodes, serd_new_uri(NS_RDF "nil")); + const SerdNode* rdf_value = + serd_nodes_manage(nodes, serd_new_uri(NS_RDF "value")); + + serd_env_set_prefix_from_strings(env, "rdf", NS_RDF); + + SerdWriter* writer = serd_writer_new(world, + SERD_TURTLE, + 0, + env, + (SerdWriteFunc)serd_buffer_sink, + &buffer); + + const SerdSink* sink = serd_writer_get_sink(writer); + + // Simple lone list + serd_sink_write(sink, SERD_TERSE_S | SERD_LIST_S, l1, rdf_first, s1, NULL); + serd_sink_write(sink, 0, l1, rdf_rest, l2, NULL); + serd_sink_write(sink, 0, l2, rdf_first, s2, NULL); + serd_sink_write(sink, 0, l2, rdf_rest, rdf_nil, NULL); + check_output(writer, &buffer, "( \"s1\" \"s2\" ) .\n"); + + // Nested terse lists + serd_sink_write(sink, SERD_TERSE_S|SERD_LIST_S|SERD_TERSE_O|SERD_LIST_O, + l1, rdf_first, l2, NULL); + serd_sink_write(sink, 0, l2, rdf_first, s1, NULL); + serd_sink_write(sink, 0, l1, rdf_rest, rdf_nil, NULL); + serd_sink_write(sink, 0, l2, rdf_rest, rdf_nil, NULL); + check_output(writer, &buffer, "( ( \"s1\" ) ) .\n"); + + // List as object + serd_sink_write(sink, SERD_EMPTY_S|SERD_LIST_O|SERD_TERSE_O, + b1, rdf_value, l1, NULL); + serd_sink_write(sink, 0, l1, rdf_first, s1, NULL); + serd_sink_write(sink, 0, l1, rdf_rest, l2, NULL); + serd_sink_write(sink, 0, l2, rdf_first, s2, NULL); + serd_sink_write(sink, 0, l2, rdf_rest, rdf_nil, NULL); + check_output(writer, &buffer, "[]\n\trdf:value ( \"s1\" \"s2\" ) .\n"); + + serd_buffer_sink_finish(&buffer); + serd_writer_free(writer); + serd_nodes_free(nodes); + serd_env_free(env); + serd_world_free(world); + free(buffer.buf); + + return 0; +} + +int +main(void) +{ + return test(); +} diff --git a/wscript b/wscript index 979ff727..0eb30597 100644 --- a/wscript +++ b/wscript @@ -208,6 +208,7 @@ def build(bld): ('sink_test', 'tests/sink_test.c'), ('serd_test', 'tests/serd_test.c'), ('read_chunk_test', 'tests/read_chunk_test.c'), + ('terse_write_test', 'tests/terse_write_test.c'), ('nodes_test', 'tests/nodes_test.c'), ('overflow_test', 'tests/overflow_test.c'), ('model_test', 'tests/model_test.c')]: @@ -489,7 +490,13 @@ def _option_combinations(options): return itertools.cycle(combinations) -def test_suite(ctx, base_uri, testdir, report, isyntax, options=[]): +def test_suite(ctx, + base_uri, + testdir, + report, + isyntax, + options=[], + output_syntax=None): srcdir = ctx.path.abspath() mf = 'http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#' @@ -502,7 +509,9 @@ def test_suite(ctx, base_uri, testdir, report, isyntax, options=[]): def run_tests(test_class, tests, expected_return): thru_flags = [['-e'], ['-b'], ['-r', 'http://example.org/']] - osyntax = _test_output_syntax(test_class) + osyntax = output_syntax or _test_output_syntax(test_class) + extra_options_iter = _option_combinations([] if output_syntax + else [['-f']]) thru_options_iter = _option_combinations(thru_flags) tests_name = '%s.%s' % (testdir, test_class[test_class.find('#') + 1:]) with ctx.group(tests_name) as check: @@ -512,7 +521,10 @@ def test_suite(ctx, base_uri, testdir, report, isyntax, options=[]): action = os.path.join('tests', testdir, basename) rel_action = os.path.join(os.path.relpath(srcdir), action) uri = base_uri + os.path.basename(action) - command = [serdi, '-a'] + options + [rel_action, uri] + command = ([serdi, '-a', '-o', osyntax] + + options + + flatten_options(next(extra_options_iter)) + + [rel_action, uri]) # Run strict test if expected_return == 0: @@ -566,7 +578,7 @@ def test(tst): import tempfile # Create test output directories - for i in ['bad', 'good', 'lax', + for i in ['bad', 'good', 'lax', 'terse', 'TurtleTests', 'NTriplesTests', 'NQuadsTests', 'TriGTests']: try: test_dir = os.path.join('tests', i) @@ -587,6 +599,7 @@ def test(tst): check(['./nodes_test']) check(['./overflow_test']) check(['./serd_test']) + check(['./terse_write_test']) check(['./read_chunk_test']) def test_syntax_io(check, in_name, check_name, lang): @@ -668,6 +681,8 @@ def test(tst): test_suite(tst, serd_base + 'bad/', 'bad', None, 'Turtle') test_suite(tst, serd_base + 'lax/', 'lax', None, 'Turtle', ['-l']) test_suite(tst, serd_base + 'lax/', 'lax', None, 'Turtle') + test_suite(tst, serd_base + 'terse/', 'terse', None, 'Turtle', ['-t'], + output_syntax='Turtle') # Standard test suites with open('earl.ttl', 'w') as report: -- cgit v1.2.1