diff options
Diffstat (limited to 'scripts/serd_bench.py')
-rwxr-xr-x | scripts/serd_bench.py | 159 |
1 files changed, 94 insertions, 65 deletions
diff --git a/scripts/serd_bench.py b/scripts/serd_bench.py index b34545b0..0e309b15 100755 --- a/scripts/serd_bench.py +++ b/scripts/serd_bench.py @@ -12,6 +12,7 @@ import sys class WorkingDirectory: "Scoped context for changing working directory" + def __init__(self, working_dir): self.original_dir = os.getcwd() self.working_dir = working_dir @@ -28,34 +29,34 @@ class WorkingDirectory: def filename(n): "Filename for a generated file with n statements" - return 'gen%d.ttl' % n + return "gen%d.ttl" % n def gen(sp2b_dir, n_min, n_max, step): "Generate files with n_min ... n_max statements if they are not present" with WorkingDirectory(sp2b_dir) as dir: for n in range(n_min, n_max + step, step): - out_path = os.path.join(dir.original_dir, 'build', filename(n)) + out_path = os.path.join(dir.original_dir, "build", filename(n)) if not os.path.exists(out_path): - subprocess.call(['./sp2b_gen', '-t', str(n), out_path]) + subprocess.call(["./sp2b_gen", "-t", str(n), out_path]) def write_header(results, progs): "Write the header line for TSV output" - results.write('n') + results.write("n") for prog in progs: - results.write('\t' + os.path.basename(prog.split()[0])) - results.write('\n') + results.write("\t" + os.path.basename(prog.split()[0])) + results.write("\n") def parse_time(report): "Return user time and max RSS from a /usr/bin/time -v report" time = memory = None - for line in report.split('\n'): - if line.startswith('\tUser time'): - time = float(line[line.find(':') + 1:]) - elif line.startswith('\tMaximum resident set'): - memory = float(line[line.find(':') + 1:]) * 1024 + for line in report.split("\n"): + if line.startswith("\tUser time"): + time = float(line[line.find(":") + 1 :]) + elif line.startswith("\tMaximum resident set"): + memory = float(line[line.find(":") + 1 :]) * 1024 return (time, memory) @@ -65,9 +66,9 @@ def get_dashes(): dash = 2.0 space = dot = 0.75 - yield [] # Solid + yield [] # Solid yield [dash, space] # Dashed - yield [dot, space] # Dotted + yield [dot, space] # Dotted # Dash-dots, with increasing number of dots for each line for i in itertools.count(2): @@ -77,14 +78,14 @@ def get_dashes(): def plot(in_file, out_filename, x_label, y_label, y_max=None): "Plot a TSV file as SVG" - matplotlib.use('agg') + matplotlib.use("agg") import matplotlib.pyplot as plt fig_height = 4.0 dashes = get_dashes() - markers = itertools.cycle(['o', 's', 'v', 'D', '*', 'p', 'P', 'h', 'X']) + markers = itertools.cycle(["o", "s", "v", "D", "*", "p", "P", "h", "X"]) - reader = csv.reader(in_file, delimiter='\t') + reader = csv.reader(in_file, delimiter="\t") header = next(reader) cols = [x for x in zip(*list(reader))] @@ -98,32 +99,36 @@ def plot(in_file, out_filename, x_label, y_label, y_max=None): if y_max is not None: ax.set_ylim([0.0, y_max]) - ax.grid(linewidth=0.25, linestyle=':', color='0', dashes=[0.2, 1.6]) - ax.ticklabel_format(style='sci', scilimits=(4, 0), useMathText=True) - ax.tick_params(axis='both', width=0.75) + ax.grid(linewidth=0.25, linestyle=":", color="0", dashes=[0.2, 1.6]) + ax.ticklabel_format(style="sci", scilimits=(4, 0), useMathText=True) + ax.tick_params(axis="both", width=0.75) x = list(map(float, cols[0])) for i, y in enumerate(cols[1::]): - ax.plot(x, - list(map(float, y)), - label=header[i + 1], - marker=next(markers), - dashes=next(dashes), - markersize=3.0, - linewidth=1.0) + ax.plot( + x, + list(map(float, y)), + label=header[i + 1], + marker=next(markers), + dashes=next(dashes), + markersize=3.0, + linewidth=1.0, + ) plt.legend() - plt.savefig(out_filename, bbox_inches='tight', pad_inches=0.025) + plt.savefig(out_filename, bbox_inches="tight", pad_inches=0.025) plt.close() - sys.stderr.write('wrote {}\n'.format(out_filename)) + sys.stderr.write("wrote {}\n".format(out_filename)) def run(progs, n_min, n_max, step): "Benchmark each program with n_min ... n_max statements" - with WorkingDirectory('build'): - results = {'time': open('serdi-time.txt', 'w'), - 'throughput': open('serdi-throughput.txt', 'w'), - 'memory': open('serdi-memory.txt', 'w')} + with WorkingDirectory("build"): + results = { + "time": open("serdi-time.txt", "w"), + "throughput": open("serdi-throughput.txt", "w"), + "memory": open("serdi-memory.txt", "w"), + } # Write TSV header for all output files for name, f in results.items(): @@ -137,70 +142,94 @@ def run(progs, n_min, n_max, step): # Run each program and fill rows with measurements for prog in progs: - cmd = '/usr/bin/time -v ' + prog + ' ' + filename(n) - with open(filename(n) + '.out', 'w') as out: - sys.stderr.write(cmd + '\n') + cmd = "/usr/bin/time -v " + prog + " " + filename(n) + with open(filename(n) + ".out", "w") as out: + sys.stderr.write(cmd + "\n") proc = subprocess.Popen( - cmd.split(), stdout=out, stderr=subprocess.PIPE) + cmd.split(), stdout=out, stderr=subprocess.PIPE + ) time, memory = parse_time(proc.communicate()[1].decode()) - rows['time'] += ['%.07f' % time] - rows['throughput'] += ['%d' % (n / time)] - rows['memory'] += [str(memory)] + rows["time"] += ["%.07f" % time] + rows["throughput"] += ["%d" % (n / time)] + rows["memory"] += [str(memory)] # Write rows to output files for name, f in results.items(): - f.write('\t'.join(rows[name]) + '\n') + f.write("\t".join(rows[name]) + "\n") for name, f in results.items(): - tsv_filename = 'serdi-%s.txt' % name - sys.stderr.write('wrote %s\n' % tsv_filename) + tsv_filename = "serdi-%s.txt" % name + sys.stderr.write("wrote %s\n" % tsv_filename) def plot_results(): "Plot all benchmark results" - with WorkingDirectory('build'): - plot(open('serdi-time.txt', 'r'), 'serdi-time.svg', - 'Statements', 'Time (s)') - plot(open('serdi-throughput.txt', 'r'), 'serdi-throughput.svg', - 'Statements', 'Statements / s') - plot(open('serdi-memory.txt', 'r'), 'serdi-memory.svg', - 'Statements', 'Bytes') + with WorkingDirectory("build"): + plot( + open("serdi-time.txt", "r"), + "serdi-time.svg", + "Statements", + "Time (s)", + ) + plot( + open("serdi-throughput.txt", "r"), + "serdi-throughput.svg", + "Statements", + "Statements / s", + ) + plot( + open("serdi-memory.txt", "r"), + "serdi-memory.svg", + "Statements", + "Bytes", + ) if __name__ == "__main__": + class OptParser(optparse.OptionParser): def format_epilog(self, formatter): return self.expand_prog_name(self.epilog) opt = OptParser( - usage='%prog [OPTION]... SP2B_DIR', - description='Benchmark RDF reading and writing commands\n', - epilog=''' + usage="%prog [OPTION]... SP2B_DIR", + description="Benchmark RDF reading and writing commands\n", + epilog=""" Example: %prog --max 100000 \\ --run 'rapper -i turtle -o turtle' \\ --run 'riot --output=ttl' \\ --run 'rdfpipe -i turtle -o turtle' /path/to/sp2b/src/ -''') - - opt.add_option('--max', type='int', default=1000000, - help='maximum triple count') - opt.add_option('--run', type='string', action='append', default=[], - help='additional command to run (input file is appended)') - opt.add_option('--no-generate', action='store_true', - help='do not generate data') - opt.add_option('--no-execute', action='store_true', - help='do not run benchmarks') - opt.add_option('--no-plot', action='store_true', - help='do not plot benchmarks') +""", + ) + + opt.add_option( + "--max", type="int", default=1000000, help="maximum triple count" + ) + opt.add_option( + "--run", + type="string", + action="append", + default=[], + help="additional command to run (input file is appended)", + ) + opt.add_option( + "--no-generate", action="store_true", help="do not generate data" + ) + opt.add_option( + "--no-execute", action="store_true", help="do not run benchmarks" + ) + opt.add_option( + "--no-plot", action="store_true", help="do not plot benchmarks" + ) (options, args) = opt.parse_args() if len(args) != 1: opt.print_usage() sys.exit(1) - progs = ['serdi -b -f -i turtle -o turtle'] + options.run + progs = ["serdi -b -f -i turtle -o turtle"] + options.run min_n = int(options.max / 10) max_n = options.max step = min_n |