From b5956c4dc6b065d664908104d5fc6752a87e3364 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Fri, 31 Mar 2023 17:17:41 -0400 Subject: Add model and serd-sort utility With all the new functionality, the complexity of the serd-pipe command-line interface is starting to push the limits of available flags. So, instead of grafting on further options to control a model, this commit adds a new tool, serd-sort, which acts somewhat like a stripped-down serd-pipe that stores statements in a model in memory. This keeps the complexity (including the user-facing complexity) of any one tool down, since other more focused tools can be used for streaming tasks in a pipeline. In other words, abandon Swissarmyknifeism, take a page from the Unix philosophy, and try to expose the model functionality to the command-line in a dedicated focused tool. The model implementation is tested by using this tool to run a subset of the usual test suites, and a special suite to test statement sorting. --- scripts/serd_bench.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) (limited to 'scripts') diff --git a/scripts/serd_bench.py b/scripts/serd_bench.py index db8c1c5b..35869ce6 100755 --- a/scripts/serd_bench.py +++ b/scripts/serd_bench.py @@ -76,8 +76,8 @@ def gen(sp2b_dir, n_min, n_max, step): def write_header(results, progs): "Write the header line for TSV output" - results.write("n") - for prog in progs: + results.write("n\tserd-pipe\tserd-sort") + for prog in progs[2:]: results.write("\t" + os.path.basename(prog.split()[0])) results.write("\n") @@ -191,13 +191,18 @@ def run(progs, n_min, n_max, step): cmd = "/usr/bin/time -v " + prog + " " + filename(n) with open(filename(n) + ".out", "w") as out: sys.stderr.write(cmd + "\n") - proc = subprocess.Popen( - cmd.split(), stdout=out, stderr=subprocess.PIPE + proc = subprocess.run( + cmd.split(), + check=True, + stdout=out, + stderr=subprocess.PIPE, ) - time, memory = parse_time(proc.communicate()[1].decode()) + time, memory = parse_time(proc.stderr.decode()) rows["time"] += ["%.07f" % time] - rows["throughput"] += ["%d" % (n / time)] + rows["throughput"] += ( + ["%d" % (n / time)] if time > 0.0 else ["0"] + ) rows["memory"] += [str(memory)] # Write rows to output files @@ -272,7 +277,11 @@ example: args = ap.parse_args(sys.argv[1:]) serd_opts = "-I turtle -I verbatim -O turtle -O verbatim -O expanded" - progs = ["tools/serd-pipe " + serd_opts] + args.run + progs = [ + "tools/serd-pipe " + serd_opts, + "tools/serd-sort " + serd_opts, + ] + args.run + min_n = int(args.max / args.steps) max_n = args.max step = min_n -- cgit v1.2.1