From ed77fe14175e94138390f2ebf5348656e4421746 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Fri, 5 May 2023 20:57:24 -0400 Subject: Update benchmarks --- README.md | 2 +- doc/serdi-memory.svg | 258 ++++++++++++++++++++++---------------------- doc/serdi-throughput.svg | 270 +++++++++++++++++++++++++---------------------- doc/serdi-time.svg | 267 +++++++++++++++++++++++++--------------------- scripts/serd_bench.py | 44 ++++++-- 5 files changed, 453 insertions(+), 388 deletions(-) diff --git a/README.md b/README.md index d3ebbca6..fa15c878 100644 --- a/README.md +++ b/README.md @@ -47,8 +47,8 @@ Turtle data generated by [sp2b][] on an AMD 1950x. Of the three, `serdi` is the fastest by a wide margin, and the only one that uses a constant amount of memory for all input sizes. -![Time](doc/serdi-time.svg) ![Throughput](doc/serdi-throughput.svg) +![Time](doc/serdi-time.svg) ![Memory](doc/serdi-memory.svg) Documentation diff --git a/doc/serdi-memory.svg b/doc/serdi-memory.svg index 11d6ed1d..db837a07 100644 --- a/doc/serdi-memory.svg +++ b/doc/serdi-memory.svg @@ -1,82 +1,88 @@ - + - + - + - + - + - + - + - + - + - - - + + + - + - + - + - - - - - - - - - + + + + + + - - - + + + - + - + - + - - - + + + + + + + + + + + + - + - + - + @@ -97,61 +103,68 @@ - + - + - - - - + + + + - - + + - + - + - + - - - + + + + + + + - - - - - - - - + + + - + - + + + - - - - + + + + + + + + - - - - + + + + - + - + + + @@ -165,81 +178,66 @@ - - - - + - + - + - + - - - - - - - - - - - + + + + + + + - + - + - - - - - - - - - - - + + + + + + + - + - + - - - - - - - - - - - + + + + + + + - - - - + + + + - + - - + + - + @@ -252,10 +250,10 @@ - - + + - + @@ -267,10 +265,10 @@ - - + + - + @@ -283,8 +281,8 @@ - - + + diff --git a/doc/serdi-throughput.svg b/doc/serdi-throughput.svg index 33654985..e32c3355 100644 --- a/doc/serdi-throughput.svg +++ b/doc/serdi-throughput.svg @@ -1,82 +1,88 @@ - + - + - + - + - + - + - + - + - + - - - + + + - + - + - + - - - - - - - - - + + + + + + - - - + + + - + - + - + - - - + + + + + + + + + + + + - + - + - + @@ -97,44 +103,71 @@ - + - + - - - - + + + + - - + + - + - + - + - - - + + + + + + + - - - + + + + + + + + + + - - - + + + + + + + + + + + + + + + + + + + - + @@ -153,81 +186,66 @@ - - - - - - - - + + + + + - - + + - + - - - - - - - - - - - + + + + + + + - - + + - + - - - - - - - - - - - + + + + + + + - - + + - + - - - - - - - - - - - + + + + + + + - - - - + + + + - - - - + + + + - + @@ -239,11 +257,11 @@ - - - + + + - + @@ -254,11 +272,11 @@ - - - + + + - + @@ -271,8 +289,8 @@ - - + + diff --git a/doc/serdi-time.svg b/doc/serdi-time.svg index 30c1340e..3b8e5523 100644 --- a/doc/serdi-time.svg +++ b/doc/serdi-time.svg @@ -1,82 +1,88 @@ - + - + - + - + - + - + - + - + - + - - - + + + - + - + - + - - - - - - - - - + + + + + + - - - + + + - + - + - + - - - + + + + + + + + + + + + - + - + - + @@ -97,49 +103,78 @@ - + - + - - - - + + + + - - + + - + - + + + + + + - - - - + + + + + + + - - - + + + + + + + - - - + + + + + + + - - - + + + + + + + + + + + + + + + + - + @@ -155,78 +190,66 @@ - + - - + + - + - - - - - - - - - - - + + + + + + + - - + + - + - - - - - - - - - - - + + + + + + + - - + + - + - - - - - - - - - - - + + + + + + + - - - - + + + + - - - - + + + + - + @@ -237,11 +260,11 @@ - - - + + + - + @@ -252,11 +275,11 @@ - - - + + + - + @@ -269,8 +292,8 @@ - - + + diff --git a/scripts/serd_bench.py b/scripts/serd_bench.py index ae2f0f66..e342976b 100755 --- a/scripts/serd_bench.py +++ b/scripts/serd_bench.py @@ -41,6 +41,23 @@ class WorkingDirectory: os.chdir(self.original_dir) +def order_of_magnitude(values): + "Return the order of magnitude to use for an axis with the given values" + if len(values) <= 0: + return 0 + + # Calculate the "best" order of magnitude like ScalarFormatter does + val = max(values) + oom = math.floor(math.log10(max(1.0, val))) + if -3 <= oom <= 3: + return 0 + + # Round down to a sensible (thousand, millions, billions, etc) order + remainder = oom % 3 + oom = oom - remainder + return oom + + def filename(num): "Filename for a generated file with n statements" return "gen%d.ttl" % num @@ -73,7 +90,7 @@ def parse_time(report): if line.startswith("\tUser time"): time = float(line[after_colon:]) elif line.startswith("\tMaximum resident set"): - memory = float(line[after_colon:]) * 1024 + memory = int(float(line[after_colon:]) * 1024) return (time, memory) @@ -94,7 +111,6 @@ def get_dashes(): def plot(in_file, out_filename, x_label, y_label, y_max=None): "Plot a TSV file as SVG" - matplotlib.use("agg") import matplotlib.pyplot as plt @@ -108,22 +124,22 @@ def plot(in_file, out_filename, x_label, y_label, y_max=None): header = next(reader) cols = list(zip(*list(reader))) + # Create a figure with a grid plt.clf() fig = plt.figure(figsize=(fig_height * math.sqrt(2), fig_height)) ax = fig.add_subplot(111) - ax.set_xlabel(x_label) ax.set_ylabel(y_label) ax.grid(linewidth=0.25, linestyle=":", color="0", dashes=[0.2, 1.6]) - ax.ticklabel_format(style="sci", scilimits=(4, 0), useMathText=True) ax.tick_params(axis="both", width=0.75) x = list(map(float, cols[0])) actual_y_max = 0.0 for i, y in enumerate(cols[1::]): y_floats = list(map(float, y)) - actual_y_max = max(actual_y_max, y_floats) + y_floats_max = max(y_floats) + actual_y_max = max(actual_y_max, y_floats_max) ax.plot( x, y_floats, @@ -134,9 +150,17 @@ def plot(in_file, out_filename, x_label, y_label, y_max=None): linewidth=1.0, ) - y_max = actual_y_max if y_max is None else y_max + # Set Y axis limits to go from zero to the maximum value with a small pad + y_max = (1.025 * actual_y_max) if y_max is None else y_max ax.set_ylim([0.0, y_max]) + # Set axis magnitudes + x_m = (order_of_magnitude(x),) * 2 + y_m = (order_of_magnitude([y_max]),) * 2 + ax.ticklabel_format(axis="x", style="sci", scilimits=x_m, useMathText=True) + ax.ticklabel_format(axis="y", style="sci", scilimits=y_m, useMathText=True) + + # Save plot plt.legend(labelspacing=0.25) plt.savefig(out_filename, bbox_inches="tight", pad_inches=0.125) plt.close() @@ -215,7 +239,7 @@ if __name__ == "__main__": formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" example: - %(prog)s --max 100000 \\ + %(prog)s --max 300000 \\ --run 'rapper -i turtle -o turtle' \\ --run 'riot --output=ttl' \\ --run 'rdfpipe -i turtle -o turtle' /path/to/sp2b/src/ @@ -223,7 +247,7 @@ example: ) ap.add_argument( - "--max", type=int, default=1000000, help="maximum triple count" + "--max", type=int, default=3000000, help="maximum triple count" ) ap.add_argument( "--run", @@ -241,12 +265,14 @@ example: ap.add_argument( "--no-plot", action="store_true", help="do not plot benchmarks" ) + ap.add_argument("--steps", type=int, default=6, help="number of steps") + ap.add_argument("sp2b_dir", help="path to sp2b test data generator") args = ap.parse_args(sys.argv[1:]) progs = ["serdi -b -f -i turtle -o turtle"] + args.run - min_n = int(args.max / 10) + min_n = int(args.max / args.steps) max_n = args.max step = min_n -- cgit v1.2.1