1 files changed, 94 insertions, 65 deletions
diff --git a/scripts/serd_bench.py b/scripts/serd_bench.py
index b34545b0..0e309b15 100755
--- a/scripts/serd_bench.py
+++ b/scripts/serd_bench.py
@@ -12,6 +12,7 @@ import sys
 
 class WorkingDirectory:
     "Scoped context for changing working directory"
+
     def __init__(self, working_dir):
         self.original_dir = os.getcwd()
         self.working_dir = working_dir
@@ -28,34 +29,34 @@ class WorkingDirectory:
 
 def filename(n):
     "Filename for a generated file with n statements"
-    return 'gen%d.ttl' % n
+    return "gen%d.ttl" % n
 
 
 def gen(sp2b_dir, n_min, n_max, step):
     "Generate files with n_min ... n_max statements if they are not present"
     with WorkingDirectory(sp2b_dir) as dir:
         for n in range(n_min, n_max + step, step):
-            out_path = os.path.join(dir.original_dir, 'build', filename(n))
+            out_path = os.path.join(dir.original_dir, "build", filename(n))
             if not os.path.exists(out_path):
-                subprocess.call(['./sp2b_gen', '-t', str(n), out_path])
+                subprocess.call(["./sp2b_gen", "-t", str(n), out_path])
 
 
 def write_header(results, progs):
     "Write the header line for TSV output"
-    results.write('n')
+    results.write("n")
     for prog in progs:
-        results.write('\t' + os.path.basename(prog.split()[0]))
-    results.write('\n')
+        results.write("\t" + os.path.basename(prog.split()[0]))
+    results.write("\n")
 
 
 def parse_time(report):
     "Return user time and max RSS from a /usr/bin/time -v report"
     time = memory = None
-    for line in report.split('\n'):
-        if line.startswith('\tUser time'):
-            time = float(line[line.find(':') + 1:])
-        elif line.startswith('\tMaximum resident set'):
-            memory = float(line[line.find(':') + 1:]) * 1024
+    for line in report.split("\n"):
+        if line.startswith("\tUser time"):
+            time = float(line[line.find(":") + 1 :])
+        elif line.startswith("\tMaximum resident set"):
+            memory = float(line[line.find(":") + 1 :]) * 1024
 
     return (time, memory)
 
@@ -65,9 +66,9 @@ def get_dashes():
     dash = 2.0
     space = dot = 0.75
 
-    yield []             # Solid
+    yield []  # Solid
     yield [dash, space]  # Dashed
-    yield [dot, space]   # Dotted
+    yield [dot, space]  # Dotted
 
     # Dash-dots, with increasing number of dots for each line
     for i in itertools.count(2):
@@ -77,14 +78,14 @@ def get_dashes():
 def plot(in_file, out_filename, x_label, y_label, y_max=None):
     "Plot a TSV file as SVG"
 
-    matplotlib.use('agg')
+    matplotlib.use("agg")
     import matplotlib.pyplot as plt
 
     fig_height = 4.0
     dashes = get_dashes()
-    markers = itertools.cycle(['o', 's', 'v', 'D', '*', 'p', 'P', 'h', 'X'])
+    markers = itertools.cycle(["o", "s", "v", "D", "*", "p", "P", "h", "X"])
 
-    reader = csv.reader(in_file, delimiter='\t')
+    reader = csv.reader(in_file, delimiter="\t")
     header = next(reader)
     cols = [x for x in zip(*list(reader))]
 
@@ -98,32 +99,36 @@ def plot(in_file, out_filename, x_label, y_label, y_max=None):
     if y_max is not None:
         ax.set_ylim([0.0, y_max])
 
-    ax.grid(linewidth=0.25, linestyle=':', color='0', dashes=[0.2, 1.6])
-    ax.ticklabel_format(style='sci', scilimits=(4, 0), useMathText=True)
-    ax.tick_params(axis='both', width=0.75)
+    ax.grid(linewidth=0.25, linestyle=":", color="0", dashes=[0.2, 1.6])
+    ax.ticklabel_format(style="sci", scilimits=(4, 0), useMathText=True)
+    ax.tick_params(axis="both", width=0.75)
 
     x = list(map(float, cols[0]))
     for i, y in enumerate(cols[1::]):
-        ax.plot(x,
-                list(map(float, y)),
-                label=header[i + 1],
-                marker=next(markers),
-                dashes=next(dashes),
-                markersize=3.0,
-                linewidth=1.0)
+        ax.plot(
+            x,
+            list(map(float, y)),
+            label=header[i + 1],
+            marker=next(markers),
+            dashes=next(dashes),
+            markersize=3.0,
+            linewidth=1.0,
+        )
 
     plt.legend()
-    plt.savefig(out_filename, bbox_inches='tight', pad_inches=0.025)
+    plt.savefig(out_filename, bbox_inches="tight", pad_inches=0.025)
     plt.close()
-    sys.stderr.write('wrote {}\n'.format(out_filename))
+    sys.stderr.write("wrote {}\n".format(out_filename))
 
 
 def run(progs, n_min, n_max, step):
     "Benchmark each program with n_min ... n_max statements"
-    with WorkingDirectory('build'):
-        results = {'time':       open('serdi-time.txt', 'w'),
-                   'throughput': open('serdi-throughput.txt', 'w'),
-                   'memory':     open('serdi-memory.txt', 'w')}
+    with WorkingDirectory("build"):
+        results = {
+            "time": open("serdi-time.txt", "w"),
+            "throughput": open("serdi-throughput.txt", "w"),
+            "memory": open("serdi-memory.txt", "w"),
+        }
 
         # Write TSV header for all output files
         for name, f in results.items():
@@ -137,70 +142,94 @@ def run(progs, n_min, n_max, step):
 
             # Run each program and fill rows with measurements
             for prog in progs:
-                cmd = '/usr/bin/time -v ' + prog + ' ' + filename(n)
-                with open(filename(n) + '.out', 'w') as out:
-                    sys.stderr.write(cmd + '\n')
+                cmd = "/usr/bin/time -v " + prog + " " + filename(n)
+                with open(filename(n) + ".out", "w") as out:
+                    sys.stderr.write(cmd + "\n")
                     proc = subprocess.Popen(
-                        cmd.split(), stdout=out, stderr=subprocess.PIPE)
+                        cmd.split(), stdout=out, stderr=subprocess.PIPE
+                    )
 
                     time, memory = parse_time(proc.communicate()[1].decode())
-                    rows['time'] += ['%.07f' % time]
-                    rows['throughput'] += ['%d' % (n / time)]
-                    rows['memory'] += [str(memory)]
+                    rows["time"] += ["%.07f" % time]
+                    rows["throughput"] += ["%d" % (n / time)]
+                    rows["memory"] += [str(memory)]
 
             # Write rows to output files
             for name, f in results.items():
-                f.write('\t'.join(rows[name]) + '\n')
+                f.write("\t".join(rows[name]) + "\n")
 
         for name, f in results.items():
-            tsv_filename = 'serdi-%s.txt' % name
-            sys.stderr.write('wrote %s\n' % tsv_filename)
+            tsv_filename = "serdi-%s.txt" % name
+            sys.stderr.write("wrote %s\n" % tsv_filename)
 
 
 def plot_results():
     "Plot all benchmark results"
-    with WorkingDirectory('build'):
-        plot(open('serdi-time.txt', 'r'), 'serdi-time.svg',
-             'Statements', 'Time (s)')
-        plot(open('serdi-throughput.txt', 'r'), 'serdi-throughput.svg',
-             'Statements', 'Statements / s')
-        plot(open('serdi-memory.txt', 'r'), 'serdi-memory.svg',
-             'Statements', 'Bytes')
+    with WorkingDirectory("build"):
+        plot(
+            open("serdi-time.txt", "r"),
+            "serdi-time.svg",
+            "Statements",
+            "Time (s)",
+        )
+        plot(
+            open("serdi-throughput.txt", "r"),
+            "serdi-throughput.svg",
+            "Statements",
+            "Statements / s",
+        )
+        plot(
+            open("serdi-memory.txt", "r"),
+            "serdi-memory.svg",
+            "Statements",
+            "Bytes",
+        )
 
 
 if __name__ == "__main__":
+
     class OptParser(optparse.OptionParser):
         def format_epilog(self, formatter):
             return self.expand_prog_name(self.epilog)
 
     opt = OptParser(
-        usage='%prog [OPTION]... SP2B_DIR',
-        description='Benchmark RDF reading and writing commands\n',
-        epilog='''
+        usage="%prog [OPTION]... SP2B_DIR",
+        description="Benchmark RDF reading and writing commands\n",
+        epilog="""
 Example:
   %prog --max 100000 \\
       --run 'rapper -i turtle -o turtle' \\
       --run 'riot --output=ttl' \\
       --run 'rdfpipe -i turtle -o turtle' /path/to/sp2b/src/
-''')
-
-    opt.add_option('--max', type='int', default=1000000,
-                   help='maximum triple count')
-    opt.add_option('--run', type='string', action='append', default=[],
-                   help='additional command to run (input file is appended)')
-    opt.add_option('--no-generate', action='store_true',
-                   help='do not generate data')
-    opt.add_option('--no-execute', action='store_true',
-                   help='do not run benchmarks')
-    opt.add_option('--no-plot', action='store_true',
-                   help='do not plot benchmarks')
+""",
+    )
+
+    opt.add_option(
+        "--max", type="int", default=1000000, help="maximum triple count"
+    )
+    opt.add_option(
+        "--run",
+        type="string",
+        action="append",
+        default=[],
+        help="additional command to run (input file is appended)",
+    )
+    opt.add_option(
+        "--no-generate", action="store_true", help="do not generate data"
+    )
+    opt.add_option(
+        "--no-execute", action="store_true", help="do not run benchmarks"
+    )
+    opt.add_option(
+        "--no-plot", action="store_true", help="do not plot benchmarks"
+    )
 
     (options, args) = opt.parse_args()
     if len(args) != 1:
         opt.print_usage()
         sys.exit(1)
 
-    progs = ['serdi -b -f -i turtle -o turtle'] + options.run
+    progs = ["serdi -b -f -i turtle -o turtle"] + options.run
     min_n = int(options.max / 10)
     max_n = options.max
     step = min_n