From 0c0f34e2fe0e73fdbad3c768a7497032f91f5b20 Mon Sep 17 00:00:00 2001
From: Andreas Madsen <amwebdk@gmail.com>
Date: Sun, 7 Feb 2016 18:01:39 +0100
Subject: [PATCH] benchmark: add script for creating scatter plot

Previously this a tool in `plot.R`. It is now are more complete tool
which executes the benchmarks many times and creates a boxplot.

PR-URL: https://github.com/nodejs/node/pull/7094
Reviewed-By: Trevor Norris <trev.norris@gmail.com>
Reviewed-By: Jeremiah Senkpiel <fishrock123@rocketmail.com>
Reviewed-By: Brian White <mscdex@mscdex.net>
Reviewed-By: Anna Henningsen <anna@addaleax.net>
---
 benchmark/scatter.R  | 78 ++++++++++++++++++++++++++++++++++++++++++++
 benchmark/scatter.js | 73 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 151 insertions(+)
 create mode 100644 benchmark/scatter.R
 create mode 100644 benchmark/scatter.js
diff --git a/benchmark/scatter.R b/benchmark/scatter.R
new file mode 100644
index 00000000000000..7b98611482dfb2
--- /dev/null
+++ b/benchmark/scatter.R
@@ -0,0 +1,78 @@
+#!/usr/bin/env Rscript
+library(ggplot2);
+library(plyr);
+
+# get __dirname and load ./_cli.R
+args = commandArgs(trailingOnly = F);
+dirname = dirname(sub("--file=", "", args[grep("--file", args)]));
+source(paste0(dirname, '/_cli.R'), chdir=T);
+
+if (is.null(args.options$xaxis) || is.null(args.options$category) ||
+   (!is.null(args.options$plot) && args.options$plot == TRUE)) {
+  stop("usage: cat file.csv | Rscript scatter.R [variable=value ...]
+  --xaxis    variable   variable name to use as xaxis (required)
+  --category variable   variable name to use as colored category (required)
+  --plot     filename   save plot to filename
+  --log                 use a log-2 scale for xaxis in the plot");
+}
+
+plot.filename = args.options$plot;
+
+# parse options
+x.axis.name = args.options$xaxis;
+category.name = args.options$category;
+use.log2 = !is.null(args.options$log);
+
+# parse data
+dat = read.csv(file('stdin'), strip.white=TRUE);
+dat = data.frame(dat);
+
+# List of aggregated variables
+aggregate = names(dat);
+aggregate = aggregate[
+  ! aggregate %in% c('rate', 'time', 'filename', x.axis.name, category.name)
+];
+# Variables that don't change aren't aggregated
+for (aggregate.key in aggregate) {
+  if (length(unique(dat[[aggregate.key]])) == 1) {
+    aggregate = aggregate[aggregate != aggregate.key];
+  }
+}
+
+# Print out aggregated variables
+for (aggregate.variable in aggregate) {
+  cat(sprintf('aggregating variable: %s\n', aggregate.variable));
+}
+if (length(aggregate) > 0) {
+  cat('\n');
+}
+
+# Calculate statistics
+stats = ddply(dat, c(x.axis.name, category.name), function(subdat) {
+  rate = subdat$rate;
+
+  # calculate standard error of the mean
+  se = sqrt(var(rate)/length(rate));
+
+  # calculate mean and 95 % confidence interval
+  r = list(
+    rate = mean(rate),
+    confidence.interval = se * qt(0.975, length(rate) - 1)
+  );
+
+  return(data.frame(r));
+});
+
+print(stats, row.names=F);
+
+if (!is.null(plot.filename)) {
+  p = ggplot(stats, aes_string(x=x.axis.name, y='mean', colour=category.name));
+  if (use.log2) {
+    p = p + scale_x_continuous(trans='log2');
+  }
+  p = p + geom_errorbar(aes(ymin=mean-confidence.interval, ymax=mean+confidence.interval), width=.1);
+  p = p + geom_point();
+  p = p + ylab("rate of operations (higher is better)");
+  p = p + ggtitle(dat[1, 1]);
+  ggsave(plot.filename, p);
+}
diff --git a/benchmark/scatter.js b/benchmark/scatter.js
new file mode 100644
index 00000000000000..3003616b58eee4
--- /dev/null
+++ b/benchmark/scatter.js
@@ -0,0 +1,73 @@
+'use strict';
+
+const fork = require('child_process').fork;
+const path = require('path');
+const CLI = require('./_cli.js');
+
+//
+// Parse arguments
+//
+const cli = CLI(`usage: ./node scatter.js [options] [--] <filename>
+  Run the benchmark script <filename> many times and output the rate (ops/s)
+  together with the benchmark variables as a csv.
+
+  --runs 30              number of samples
+  --set  variable=value  set benchmark variable (can be repeated)
+`, {
+  arrayArgs: ['set']
+});
+
+if (cli.items.length !== 1) {
+  cli.abort(cli.usage);
+  return;
+}
+
+// Create queue from the benchmarks list such both node versions are tested
+// `runs` amount of times each.
+const filepath = path.resolve(cli.items[0]);
+const name = filepath.slice(__dirname.length + 1);
+const runs = cli.optional.runs ? parseInt(cli.optional.runs, 10) : 30;
+
+let printHeader = true;
+
+function csvEncodeValue(value) {
+  if (typeof value === 'number') {
+    return value.toString();
+  } else {
+    return '"' + value.replace(/"/g, '""') + '"';
+  }
+}
+
+(function recursive(i) {
+  const child = fork(path.resolve(__dirname, filepath), cli.optional.set);
+
+  child.on('message', function(data) {
+    // print csv header
+    if (printHeader) {
+      const confHeader = Object.keys(data.conf)
+        .map(csvEncodeValue)
+        .join(', ');
+      console.log(`"filename", ${confHeader}, "rate", "time"`);
+      printHeader = false;
+    }
+
+    // print data row
+    const confData = Object.keys(data.conf)
+      .map((key) => csvEncodeValue(data.conf[key]))
+      .join(', ');
+
+    console.log(`"${name}", ${confData}, ${data.rate}, ${data.time}`);
+  });
+
+  child.once('close', function(code) {
+    if (code) {
+      process.exit(code);
+      return;
+    }
+
+    // If there are more benchmarks execute the next
+    if (i + 1 < runs) {
+      recursive(i + 1);
+    }
+  });
+})(0);