aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xserd_bench.py131
1 files changed, 110 insertions, 21 deletions
diff --git a/serd_bench.py b/serd_bench.py
index 719fff3f..4dda6e36 100755
--- a/serd_bench.py
+++ b/serd_bench.py
@@ -1,34 +1,44 @@
#!/usr/bin/env python
+import csv
+import itertools
+import matplotlib.pyplot as plt
import optparse
import os
import subprocess
import sys
+import math
+
class WorkingDirectory:
"Scoped context for changing working directory"
def __init__(self, working_dir):
self.original_dir = os.getcwd()
- self.working_dir = working_dir
+ self.working_dir = working_dir
def __enter__(self):
+ sys.stderr.write("Entering directory `%s'\n" % self.working_dir)
os.chdir(self.working_dir)
return self
def __exit__(self, type, value, traceback):
+ sys.stderr.write("Leaving directory `%s'\n" % self.working_dir)
os.chdir(self.original_dir)
+
def filename(n):
"Filename for a generated file with n statements"
return 'gen%d.ttl' % n
+
def gen(sp2b_dir, n_min, n_max, step):
"Generate files with n_min ... n_max statements if they are not present"
with WorkingDirectory(sp2b_dir) as dir:
for n in range(n_min, n_max + step, step):
out_path = os.path.join(dir.original_dir, 'build', filename(n))
if not os.path.exists(out_path):
- subprocess.call(['sp2b_gen', '-t', str(n), out_path])
+ subprocess.call(['./sp2b_gen', '-t', str(n), out_path])
+
def write_header(results, progs):
"Write the header line for TSV output"
@@ -37,6 +47,7 @@ def write_header(results, progs):
results.write('\t' + os.path.basename(prog.split()[0]))
results.write('\n')
+
def parse_time(report):
"Return user time and max RSS from a /usr/bin/time -v report"
time = memory = None
@@ -48,6 +59,61 @@ def parse_time(report):
return (time, memory)
+
+def get_dashes():
+ "Generator for plot line dash patterns"
+ dash = 2.0
+ space = dot = 0.75
+
+ yield [] # Solid
+ yield [dash, space] # Dashed
+ yield [dot, space] # Dotted
+
+ # Dash-dots, with increasing number of dots for each line
+ for i in itertools.count(2):
+ yield [dash, space] + [dot, space] * (i - 1)
+
+
+def plot(in_file, out_filename, x_label, y_label, y_max=None):
+ "Plot a TSV file as SVG"
+ fig_height = 4.0
+ dashes = get_dashes()
+ markers = itertools.cycle(['o', 's', 'v', 'D', '*', 'p', 'P', 'h', 'X'])
+
+ reader = csv.reader(in_file, delimiter='\t')
+ header = next(reader)
+ cols = [x for x in zip(*list(reader))]
+
+ plt.clf()
+ fig = plt.figure(figsize=(fig_height * math.sqrt(2), fig_height))
+ ax = fig.add_subplot(111)
+
+ ax.set_xlabel(x_label)
+ ax.set_ylabel(y_label)
+
+ if y_max is not None:
+ ax.set_ylim([0.0, y_max])
+
+ ax.grid(linewidth=0.25, linestyle=':', color='0', dashes=[0.2, 1.6])
+ ax.ticklabel_format(style='sci', scilimits=(4, 0), useMathText=True)
+ ax.tick_params(axis='both', width=0.75)
+
+ x = list(map(float, cols[0]))
+ for i, y in enumerate(cols[1::]):
+ ax.plot(x,
+ list(map(float, y)),
+ label=header[i + 1],
+ marker=next(markers),
+ dashes=next(dashes),
+ markersize=3.0,
+ linewidth=1.0)
+
+ plt.legend()
+ plt.savefig(out_filename, bbox_inches='tight', pad_inches=0.025)
+ plt.close()
+ sys.stderr.write('wrote {}\n'.format(out_filename))
+
+
def run(progs, n_min, n_max, step):
"Benchmark each program with n_min ... n_max statements"
with WorkingDirectory('build'):
@@ -56,13 +122,13 @@ def run(progs, n_min, n_max, step):
'memory': open('serdi-memory.txt', 'w')}
# Write TSV header for all output files
- for name, f in results.iteritems():
+ for name, f in results.items():
write_header(f, progs)
for n in range(n_min, n_max + step, step):
# Add first column (n) to rows
rows = {}
- for name, _ in results.iteritems():
+ for name, _ in results.items():
rows[name] = [str(n)]
# Run each program and fill rows with measurements
@@ -73,17 +139,30 @@ def run(progs, n_min, n_max, step):
proc = subprocess.Popen(
cmd.split(), stdout=out, stderr=subprocess.PIPE)
- time, memory = parse_time(proc.communicate()[1])
- rows['time'] += ['%.07f' % time]
+ time, memory = parse_time(proc.communicate()[1].decode())
+ rows['time'] += ['%.07f' % time]
rows['throughput'] += ['%d' % (n / time)]
- rows['memory'] += [str(memory)]
+ rows['memory'] += [str(memory)]
# Write rows to output files
- for name, f in results.iteritems():
+ for name, f in results.items():
f.write('\t'.join(rows[name]) + '\n')
- for name, _ in results.iteritems():
- sys.stderr.write('wrote build/serdi-%s.txt\n' % name)
+ for name, f in results.items():
+ tsv_filename = 'serdi-%s.txt' % name
+ sys.stderr.write('wrote %s\n' % tsv_filename)
+
+
+def plot_results():
+ "Plot all benchmark results"
+ with WorkingDirectory('build'):
+ plot(open('serdi-time.txt', 'r'), 'serdi-time.svg',
+ 'Statements', 'Time (s)')
+ plot(open('serdi-throughput.txt', 'r'), 'serdi-throughput.svg',
+ 'Statements', 'Statements / s')
+ plot(open('serdi-memory.txt', 'r'), 'serdi-memory.svg',
+ 'Statements', 'Bytes')
+
if __name__ == "__main__":
class OptParser(optparse.OptionParser):
@@ -94,17 +173,23 @@ if __name__ == "__main__":
usage='%prog [OPTION]... SP2B_DIR',
description='Benchmark RDF reading and writing commands\n',
epilog='''
-Example:
- %prog --max 100000 \\
- --run 'rapper -i turtle -o turtle' \\
- --run 'riot --output=ttl' \\
- --run 'rdfpipe -i turtle -o turtle' /path/to/sp2b/src/
- ''')
+Example:
+ %prog --max 100000 \\
+ --run 'rapper -i turtle -o turtle' \\
+ --run 'riot --output=ttl' \\
+ --run 'rdfpipe -i turtle -o turtle' /path/to/sp2b/src/
+''')
opt.add_option('--max', type='int', default=1000000,
help='maximum triple count')
opt.add_option('--run', type='string', action='append', default=[],
help='additional command to run (input file is appended)')
+ opt.add_option('--no-generate', action='store_true',
+ help='do not generate data')
+ opt.add_option('--no-execute', action='store_true',
+ help='do not run benchmarks')
+ opt.add_option('--no-plot', action='store_true',
+ help='do not plot benchmarks')
(options, args) = opt.parse_args()
if len(args) != 1:
@@ -112,9 +197,13 @@ Example:
sys.exit(1)
progs = ['serdi -b -f -i turtle -o turtle'] + options.run
- min_n = options.max / 10
+ min_n = int(options.max / 10)
max_n = options.max
- step = min_n
-
- gen(str(args[0]), min_n, max_n, step)
- run(progs, min_n, max_n, step)
+ step = min_n
+
+ if not options.no_generate:
+ gen(str(args[0]), min_n, max_n, step)
+ if not options.no_execute:
+ run(progs, min_n, max_n, step)
+ if not options.no_plot:
+ plot_results()