aboutsummaryrefslogtreecommitdiffstats
path: root/serd_bench.py
diff options
context:
space:
mode:
Diffstat (limited to 'serd_bench.py')
-rwxr-xr-xserd_bench.py120
1 files changed, 120 insertions, 0 deletions
diff --git a/serd_bench.py b/serd_bench.py
new file mode 100755
index 00000000..719fff3f
--- /dev/null
+++ b/serd_bench.py
@@ -0,0 +1,120 @@
+#!/usr/bin/env python
+
+import optparse
+import os
+import subprocess
+import sys
+
+class WorkingDirectory:
+ "Scoped context for changing working directory"
+ def __init__(self, working_dir):
+ self.original_dir = os.getcwd()
+ self.working_dir = working_dir
+
+ def __enter__(self):
+ os.chdir(self.working_dir)
+ return self
+
+ def __exit__(self, type, value, traceback):
+ os.chdir(self.original_dir)
+
+def filename(n):
+ "Filename for a generated file with n statements"
+ return 'gen%d.ttl' % n
+
+def gen(sp2b_dir, n_min, n_max, step):
+ "Generate files with n_min ... n_max statements if they are not present"
+ with WorkingDirectory(sp2b_dir) as dir:
+ for n in range(n_min, n_max + step, step):
+ out_path = os.path.join(dir.original_dir, 'build', filename(n))
+ if not os.path.exists(out_path):
+ subprocess.call(['sp2b_gen', '-t', str(n), out_path])
+
+def write_header(results, progs):
+ "Write the header line for TSV output"
+ results.write('n')
+ for prog in progs:
+ results.write('\t' + os.path.basename(prog.split()[0]))
+ results.write('\n')
+
+def parse_time(report):
+ "Return user time and max RSS from a /usr/bin/time -v report"
+ time = memory = None
+ for line in report.split('\n'):
+ if line.startswith('\tUser time'):
+ time = float(line[line.find(':') + 1:])
+ elif line.startswith('\tMaximum resident set'):
+ memory = float(line[line.find(':') + 1:]) * 1024
+
+ return (time, memory)
+
+def run(progs, n_min, n_max, step):
+ "Benchmark each program with n_min ... n_max statements"
+ with WorkingDirectory('build'):
+ results = {'time': open('serdi-time.txt', 'w'),
+ 'throughput': open('serdi-throughput.txt', 'w'),
+ 'memory': open('serdi-memory.txt', 'w')}
+
+ # Write TSV header for all output files
+ for name, f in results.iteritems():
+ write_header(f, progs)
+
+ for n in range(n_min, n_max + step, step):
+ # Add first column (n) to rows
+ rows = {}
+ for name, _ in results.iteritems():
+ rows[name] = [str(n)]
+
+ # Run each program and fill rows with measurements
+ for prog in progs:
+ cmd = '/usr/bin/time -v ' + prog + ' ' + filename(n)
+ with open(filename(n) + '.out', 'w') as out:
+ sys.stderr.write(cmd + '\n')
+ proc = subprocess.Popen(
+ cmd.split(), stdout=out, stderr=subprocess.PIPE)
+
+ time, memory = parse_time(proc.communicate()[1])
+ rows['time'] += ['%.07f' % time]
+ rows['throughput'] += ['%d' % (n / time)]
+ rows['memory'] += [str(memory)]
+
+ # Write rows to output files
+ for name, f in results.iteritems():
+ f.write('\t'.join(rows[name]) + '\n')
+
+ for name, _ in results.iteritems():
+ sys.stderr.write('wrote build/serdi-%s.txt\n' % name)
+
+if __name__ == "__main__":
+ class OptParser(optparse.OptionParser):
+ def format_epilog(self, formatter):
+ return self.expand_prog_name(self.epilog)
+
+ opt = OptParser(
+ usage='%prog [OPTION]... SP2B_DIR',
+ description='Benchmark RDF reading and writing commands\n',
+ epilog='''
+Example:
+ %prog --max 100000 \\
+ --run 'rapper -i turtle -o turtle' \\
+ --run 'riot --output=ttl' \\
+ --run 'rdfpipe -i turtle -o turtle' /path/to/sp2b/src/
+ ''')
+
+ opt.add_option('--max', type='int', default=1000000,
+ help='maximum triple count')
+ opt.add_option('--run', type='string', action='append', default=[],
+ help='additional command to run (input file is appended)')
+
+ (options, args) = opt.parse_args()
+ if len(args) != 1:
+ opt.print_usage()
+ sys.exit(1)
+
+ progs = ['serdi -b -f -i turtle -o turtle'] + options.run
+ min_n = options.max / 10
+ max_n = options.max
+ step = min_n
+
+ gen(str(args[0]), min_n, max_n, step)
+ run(progs, min_n, max_n, step)