From 7d1b98ad1b2f5202126d78eaeb471263444a1384 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Sun, 15 Nov 2020 12:31:41 +0100 Subject: Move benchmark script to scripts subdirectory --- scripts/serd_bench.py | 213 ++++++++++++++++++++++++++++++++++++++++++++++++++ serd_bench.py | 213 -------------------------------------------------- 2 files changed, 213 insertions(+), 213 deletions(-) create mode 100755 scripts/serd_bench.py delete mode 100755 serd_bench.py diff --git a/scripts/serd_bench.py b/scripts/serd_bench.py new file mode 100755 index 00000000..b34545b0 --- /dev/null +++ b/scripts/serd_bench.py @@ -0,0 +1,213 @@ +#!/usr/bin/env python + +import csv +import itertools +import math +import matplotlib +import optparse +import os +import subprocess +import sys + + +class WorkingDirectory: + "Scoped context for changing working directory" + def __init__(self, working_dir): + self.original_dir = os.getcwd() + self.working_dir = working_dir + + def __enter__(self): + sys.stderr.write("Entering directory `%s'\n" % self.working_dir) + os.chdir(self.working_dir) + return self + + def __exit__(self, type, value, traceback): + sys.stderr.write("Leaving directory `%s'\n" % self.working_dir) + os.chdir(self.original_dir) + + +def filename(n): + "Filename for a generated file with n statements" + return 'gen%d.ttl' % n + + +def gen(sp2b_dir, n_min, n_max, step): + "Generate files with n_min ... n_max statements if they are not present" + with WorkingDirectory(sp2b_dir) as dir: + for n in range(n_min, n_max + step, step): + out_path = os.path.join(dir.original_dir, 'build', filename(n)) + if not os.path.exists(out_path): + subprocess.call(['./sp2b_gen', '-t', str(n), out_path]) + + +def write_header(results, progs): + "Write the header line for TSV output" + results.write('n') + for prog in progs: + results.write('\t' + os.path.basename(prog.split()[0])) + results.write('\n') + + +def parse_time(report): + "Return user time and max RSS from a /usr/bin/time -v report" + time = memory = None + for line in report.split('\n'): + if line.startswith('\tUser time'): + time = float(line[line.find(':') + 1:]) + elif line.startswith('\tMaximum resident set'): + memory = float(line[line.find(':') + 1:]) * 1024 + + return (time, memory) + + +def get_dashes(): + "Generator for plot line dash patterns" + dash = 2.0 + space = dot = 0.75 + + yield [] # Solid + yield [dash, space] # Dashed + yield [dot, space] # Dotted + + # Dash-dots, with increasing number of dots for each line + for i in itertools.count(2): + yield [dash, space] + [dot, space] * (i - 1) + + +def plot(in_file, out_filename, x_label, y_label, y_max=None): + "Plot a TSV file as SVG" + + matplotlib.use('agg') + import matplotlib.pyplot as plt + + fig_height = 4.0 + dashes = get_dashes() + markers = itertools.cycle(['o', 's', 'v', 'D', '*', 'p', 'P', 'h', 'X']) + + reader = csv.reader(in_file, delimiter='\t') + header = next(reader) + cols = [x for x in zip(*list(reader))] + + plt.clf() + fig = plt.figure(figsize=(fig_height * math.sqrt(2), fig_height)) + ax = fig.add_subplot(111) + + ax.set_xlabel(x_label) + ax.set_ylabel(y_label) + + if y_max is not None: + ax.set_ylim([0.0, y_max]) + + ax.grid(linewidth=0.25, linestyle=':', color='0', dashes=[0.2, 1.6]) + ax.ticklabel_format(style='sci', scilimits=(4, 0), useMathText=True) + ax.tick_params(axis='both', width=0.75) + + x = list(map(float, cols[0])) + for i, y in enumerate(cols[1::]): + ax.plot(x, + list(map(float, y)), + label=header[i + 1], + marker=next(markers), + dashes=next(dashes), + markersize=3.0, + linewidth=1.0) + + plt.legend() + plt.savefig(out_filename, bbox_inches='tight', pad_inches=0.025) + plt.close() + sys.stderr.write('wrote {}\n'.format(out_filename)) + + +def run(progs, n_min, n_max, step): + "Benchmark each program with n_min ... n_max statements" + with WorkingDirectory('build'): + results = {'time': open('serdi-time.txt', 'w'), + 'throughput': open('serdi-throughput.txt', 'w'), + 'memory': open('serdi-memory.txt', 'w')} + + # Write TSV header for all output files + for name, f in results.items(): + write_header(f, progs) + + for n in range(n_min, n_max + step, step): + # Add first column (n) to rows + rows = {} + for name, _ in results.items(): + rows[name] = [str(n)] + + # Run each program and fill rows with measurements + for prog in progs: + cmd = '/usr/bin/time -v ' + prog + ' ' + filename(n) + with open(filename(n) + '.out', 'w') as out: + sys.stderr.write(cmd + '\n') + proc = subprocess.Popen( + cmd.split(), stdout=out, stderr=subprocess.PIPE) + + time, memory = parse_time(proc.communicate()[1].decode()) + rows['time'] += ['%.07f' % time] + rows['throughput'] += ['%d' % (n / time)] + rows['memory'] += [str(memory)] + + # Write rows to output files + for name, f in results.items(): + f.write('\t'.join(rows[name]) + '\n') + + for name, f in results.items(): + tsv_filename = 'serdi-%s.txt' % name + sys.stderr.write('wrote %s\n' % tsv_filename) + + +def plot_results(): + "Plot all benchmark results" + with WorkingDirectory('build'): + plot(open('serdi-time.txt', 'r'), 'serdi-time.svg', + 'Statements', 'Time (s)') + plot(open('serdi-throughput.txt', 'r'), 'serdi-throughput.svg', + 'Statements', 'Statements / s') + plot(open('serdi-memory.txt', 'r'), 'serdi-memory.svg', + 'Statements', 'Bytes') + + +if __name__ == "__main__": + class OptParser(optparse.OptionParser): + def format_epilog(self, formatter): + return self.expand_prog_name(self.epilog) + + opt = OptParser( + usage='%prog [OPTION]... SP2B_DIR', + description='Benchmark RDF reading and writing commands\n', + epilog=''' +Example: + %prog --max 100000 \\ + --run 'rapper -i turtle -o turtle' \\ + --run 'riot --output=ttl' \\ + --run 'rdfpipe -i turtle -o turtle' /path/to/sp2b/src/ +''') + + opt.add_option('--max', type='int', default=1000000, + help='maximum triple count') + opt.add_option('--run', type='string', action='append', default=[], + help='additional command to run (input file is appended)') + opt.add_option('--no-generate', action='store_true', + help='do not generate data') + opt.add_option('--no-execute', action='store_true', + help='do not run benchmarks') + opt.add_option('--no-plot', action='store_true', + help='do not plot benchmarks') + + (options, args) = opt.parse_args() + if len(args) != 1: + opt.print_usage() + sys.exit(1) + + progs = ['serdi -b -f -i turtle -o turtle'] + options.run + min_n = int(options.max / 10) + max_n = options.max + step = min_n + + if not options.no_generate: + gen(str(args[0]), min_n, max_n, step) + if not options.no_execute: + run(progs, min_n, max_n, step) + if not options.no_plot: + plot_results() diff --git a/serd_bench.py b/serd_bench.py deleted file mode 100755 index b34545b0..00000000 --- a/serd_bench.py +++ /dev/null @@ -1,213 +0,0 @@ -#!/usr/bin/env python - -import csv -import itertools -import math -import matplotlib -import optparse -import os -import subprocess -import sys - - -class WorkingDirectory: - "Scoped context for changing working directory" - def __init__(self, working_dir): - self.original_dir = os.getcwd() - self.working_dir = working_dir - - def __enter__(self): - sys.stderr.write("Entering directory `%s'\n" % self.working_dir) - os.chdir(self.working_dir) - return self - - def __exit__(self, type, value, traceback): - sys.stderr.write("Leaving directory `%s'\n" % self.working_dir) - os.chdir(self.original_dir) - - -def filename(n): - "Filename for a generated file with n statements" - return 'gen%d.ttl' % n - - -def gen(sp2b_dir, n_min, n_max, step): - "Generate files with n_min ... n_max statements if they are not present" - with WorkingDirectory(sp2b_dir) as dir: - for n in range(n_min, n_max + step, step): - out_path = os.path.join(dir.original_dir, 'build', filename(n)) - if not os.path.exists(out_path): - subprocess.call(['./sp2b_gen', '-t', str(n), out_path]) - - -def write_header(results, progs): - "Write the header line for TSV output" - results.write('n') - for prog in progs: - results.write('\t' + os.path.basename(prog.split()[0])) - results.write('\n') - - -def parse_time(report): - "Return user time and max RSS from a /usr/bin/time -v report" - time = memory = None - for line in report.split('\n'): - if line.startswith('\tUser time'): - time = float(line[line.find(':') + 1:]) - elif line.startswith('\tMaximum resident set'): - memory = float(line[line.find(':') + 1:]) * 1024 - - return (time, memory) - - -def get_dashes(): - "Generator for plot line dash patterns" - dash = 2.0 - space = dot = 0.75 - - yield [] # Solid - yield [dash, space] # Dashed - yield [dot, space] # Dotted - - # Dash-dots, with increasing number of dots for each line - for i in itertools.count(2): - yield [dash, space] + [dot, space] * (i - 1) - - -def plot(in_file, out_filename, x_label, y_label, y_max=None): - "Plot a TSV file as SVG" - - matplotlib.use('agg') - import matplotlib.pyplot as plt - - fig_height = 4.0 - dashes = get_dashes() - markers = itertools.cycle(['o', 's', 'v', 'D', '*', 'p', 'P', 'h', 'X']) - - reader = csv.reader(in_file, delimiter='\t') - header = next(reader) - cols = [x for x in zip(*list(reader))] - - plt.clf() - fig = plt.figure(figsize=(fig_height * math.sqrt(2), fig_height)) - ax = fig.add_subplot(111) - - ax.set_xlabel(x_label) - ax.set_ylabel(y_label) - - if y_max is not None: - ax.set_ylim([0.0, y_max]) - - ax.grid(linewidth=0.25, linestyle=':', color='0', dashes=[0.2, 1.6]) - ax.ticklabel_format(style='sci', scilimits=(4, 0), useMathText=True) - ax.tick_params(axis='both', width=0.75) - - x = list(map(float, cols[0])) - for i, y in enumerate(cols[1::]): - ax.plot(x, - list(map(float, y)), - label=header[i + 1], - marker=next(markers), - dashes=next(dashes), - markersize=3.0, - linewidth=1.0) - - plt.legend() - plt.savefig(out_filename, bbox_inches='tight', pad_inches=0.025) - plt.close() - sys.stderr.write('wrote {}\n'.format(out_filename)) - - -def run(progs, n_min, n_max, step): - "Benchmark each program with n_min ... n_max statements" - with WorkingDirectory('build'): - results = {'time': open('serdi-time.txt', 'w'), - 'throughput': open('serdi-throughput.txt', 'w'), - 'memory': open('serdi-memory.txt', 'w')} - - # Write TSV header for all output files - for name, f in results.items(): - write_header(f, progs) - - for n in range(n_min, n_max + step, step): - # Add first column (n) to rows - rows = {} - for name, _ in results.items(): - rows[name] = [str(n)] - - # Run each program and fill rows with measurements - for prog in progs: - cmd = '/usr/bin/time -v ' + prog + ' ' + filename(n) - with open(filename(n) + '.out', 'w') as out: - sys.stderr.write(cmd + '\n') - proc = subprocess.Popen( - cmd.split(), stdout=out, stderr=subprocess.PIPE) - - time, memory = parse_time(proc.communicate()[1].decode()) - rows['time'] += ['%.07f' % time] - rows['throughput'] += ['%d' % (n / time)] - rows['memory'] += [str(memory)] - - # Write rows to output files - for name, f in results.items(): - f.write('\t'.join(rows[name]) + '\n') - - for name, f in results.items(): - tsv_filename = 'serdi-%s.txt' % name - sys.stderr.write('wrote %s\n' % tsv_filename) - - -def plot_results(): - "Plot all benchmark results" - with WorkingDirectory('build'): - plot(open('serdi-time.txt', 'r'), 'serdi-time.svg', - 'Statements', 'Time (s)') - plot(open('serdi-throughput.txt', 'r'), 'serdi-throughput.svg', - 'Statements', 'Statements / s') - plot(open('serdi-memory.txt', 'r'), 'serdi-memory.svg', - 'Statements', 'Bytes') - - -if __name__ == "__main__": - class OptParser(optparse.OptionParser): - def format_epilog(self, formatter): - return self.expand_prog_name(self.epilog) - - opt = OptParser( - usage='%prog [OPTION]... SP2B_DIR', - description='Benchmark RDF reading and writing commands\n', - epilog=''' -Example: - %prog --max 100000 \\ - --run 'rapper -i turtle -o turtle' \\ - --run 'riot --output=ttl' \\ - --run 'rdfpipe -i turtle -o turtle' /path/to/sp2b/src/ -''') - - opt.add_option('--max', type='int', default=1000000, - help='maximum triple count') - opt.add_option('--run', type='string', action='append', default=[], - help='additional command to run (input file is appended)') - opt.add_option('--no-generate', action='store_true', - help='do not generate data') - opt.add_option('--no-execute', action='store_true', - help='do not run benchmarks') - opt.add_option('--no-plot', action='store_true', - help='do not plot benchmarks') - - (options, args) = opt.parse_args() - if len(args) != 1: - opt.print_usage() - sys.exit(1) - - progs = ['serdi -b -f -i turtle -o turtle'] + options.run - min_n = int(options.max / 10) - max_n = options.max - step = min_n - - if not options.no_generate: - gen(str(args[0]), min_n, max_n, step) - if not options.no_execute: - run(progs, min_n, max_n, step) - if not options.no_plot: - plot_results() -- cgit v1.2.1