From 855009af7f46c3fb7c5653713a0930e0b9c33f45 Mon Sep 17 00:00:00 2001 From: Andreas Madsen Date: Mon, 1 Feb 2016 22:16:55 +0100 Subject: [PATCH] benchmark: use t-test for comparing node versions The data sampling is done in node and the data processing is done in R. Only plyr was added as an R dependency and it is fairly standard. PR-URL: https://github.com/nodejs/node/pull/7094 Reviewed-By: Trevor Norris Reviewed-By: Jeremiah Senkpiel Reviewed-By: Brian White Reviewed-By: Anna Henningsen --- benchmark/_cli.R | 24 +++++ benchmark/compare.R | 70 +++++++++++++ benchmark/compare.js | 229 +++++++++++++------------------------------ 3 files changed, 161 insertions(+), 162 deletions(-) create mode 100644 benchmark/_cli.R create mode 100644 benchmark/compare.R diff --git a/benchmark/_cli.R b/benchmark/_cli.R new file mode 100644 index 00000000000000..522f65653fd6df --- /dev/null +++ b/benchmark/_cli.R @@ -0,0 +1,24 @@ + +args = commandArgs(TRUE); + +args.options = list(); + +temp.option.key = NULL; + +for (arg in args) { + # Optional arguments declaration + if (substring(arg, 1, 1) == '-') { + temp.option.key = substring(arg, 2); + if (substring(arg, 2, 2) == '-') { + temp.option.key = substring(arg, 3); + } + + args.options[[temp.option.key]] = TRUE; + } + # Optional arguments value + else if (!is.null(temp.option.key)) { + args.options[[temp.option.key]] = arg; + + temp.option.key = NULL; + } +} diff --git a/benchmark/compare.R b/benchmark/compare.R new file mode 100644 index 00000000000000..01beb380469305 --- /dev/null +++ b/benchmark/compare.R @@ -0,0 +1,70 @@ +#!/usr/bin/env Rscript +library(ggplot2); +library(plyr); + +# get __dirname and load ./_cli.R +args = commandArgs(trailingOnly = F); +dirname = dirname(sub("--file=", "", args[grep("--file", args)])); +source(paste0(dirname, '/_cli.R'), chdir=T); + +if (!is.null(args.options$help) || + (!is.null(args.options$plot) && args.options$plot == TRUE)) { + stop("usage: cat file.csv | Rscript compare.R + --help show this message + --plot filename save plot to filename"); +} + +plot.filename = args.options$plot; + +dat = read.csv(file('stdin')); +dat = data.frame(dat); +dat$nameTwoLines = paste0(dat$filename, '\n', dat$configuration); +dat$name = paste0(dat$filename, dat$configuration); + +# Create a box plot +if (!is.null(plot.filename)) { + p = ggplot(data=dat); + p = p + geom_boxplot(aes(x=nameTwoLines, y=rate, fill=binary)); + p = p + ylab("rate of operations (higher is better)"); + p = p + xlab("benchmark"); + p = p + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)); + ggsave(plot.filename, p); +} + +# Print a table with results +statistics = ddply(dat, "name", function(subdat) { + # Perform a statistics test to see of there actually is a difference in + # performace. + w = t.test(rate ~ binary, data=subdat); + + # Calculate improvement for the "new" binary compared with the "old" binary + new_mu = mean(subset(subdat, binary == "new")$rate); + old_mu = mean(subset(subdat, binary == "old")$rate); + improvement = sprintf("%.2f %%", ((new_mu - old_mu) / old_mu * 100)); + + # Add user friendly stars to the table. There should be at least one star + # before you can say that there is an improvement. + significant = ''; + if (w$p.value < 0.001) { + significant = '***'; + } else if (w$p.value < 0.01) { + significant = '**'; + } else if (w$p.value < 0.05) { + significant = '*'; + } + + r = list( + improvement = improvement, + significant = significant, + p.value = w$p.value + ); + return(data.frame(r)); +}); + + +# Set the benchmark names as the row.names to left align them in the print +row.names(statistics) = statistics$name; +statistics$name = NULL; + +options(width = 200); +print(statistics); diff --git a/benchmark/compare.js b/benchmark/compare.js index 4faa8f8638becd..fb179e0e4703ed 100644 --- a/benchmark/compare.js +++ b/benchmark/compare.js @@ -1,181 +1,86 @@ 'use strict'; -var usage = 'node benchmark/compare.js ' + - ' ' + - '[--html] [--red|-r] [--green|-g] ' + - '[-- [testFilter]]'; -var show = 'both'; -var nodes = []; -var html = false; -var benchmarks; +const fork = require('child_process').fork; +const path = require('path'); +const CLI = require('./_cli.js'); + +// +// Parse arguments +// +const cli = CLI(`usage: ./node compare.js [options] [--] ... + Run each benchmark in the directory many times using two diffrent + node versions. More than one directory can be specified. + The output is formatted as csv, which can be processed using for + example 'compare.R'. + + --new ./new-node-binary new node binary (required) + --old ./old-node-binary old node binary (required) + --runs 30 number of samples + --filter pattern string to filter benchmark scripts + --set variable=value set benchmark variable (can be repeated) +`, { + arrayArgs: ['set'] +}); + +if (!cli.optional.new || !cli.optional.old) { + cli.abort(cli.usage); + return; +} -for (var i = 2; i < process.argv.length; i++) { - var arg = process.argv[i]; - switch (arg) { - case '--red': case '-r': - show = show === 'green' ? 'both' : 'red'; - break; - case '--green': case '-g': - show = show === 'red' ? 'both' : 'green'; - break; - case '--html': - html = true; - break; - case '-h': case '-?': case '--help': - console.log(usage); - process.exit(0); - break; - case '--': - benchmarks = []; - break; - default: - if (Array.isArray(benchmarks)) - benchmarks.push(arg); - else - nodes.push(arg); - break; - } +const binaries = ['old', 'new']; +const runs = cli.optional.runs ? parseInt(cli.optional.runs, 10) : 30; +const benchmarks = cli.benchmarks(); + +if (benchmarks.length === 0) { + console.error('no benchmarks found'); + process.exit(1); } -var start, green, red, reset, end; -if (!html) { - start = ''; - green = '\u001b[1;32m'; - red = '\u001b[1;31m'; - reset = '\u001b[m'; - end = ''; -} else { - start = '
';
-  green = '';
-  red = '';
-  reset = '';
-  end = '
'; +// Create queue from the benchmarks list such both node versions are tested +// `runs` amount of times each. +const queue = []; +for (let iter = 0; iter < runs; iter++) { + for (const filename of benchmarks) { + for (const binary of binaries) { + queue.push({ binary, filename, iter }); + } + } } -var runBench = process.env.NODE_BENCH || 'bench'; +// Print csv header +console.log('"binary", "filename", "configuration", "rate", "time"'); -if (nodes.length !== 2) - return console.error('usage:\n %s', usage); +(function recursive(i) { + const job = queue[i]; -var spawn = require('child_process').spawn; -var results = {}; -var toggle = 1; -var r = (+process.env.NODE_BENCH_RUNS || 1) * 2; + const child = fork(path.resolve(__dirname, job.filename), cli.optional.set, { + execPath: cli.optional[job.binary] + }); -run(); -function run() { - if (--r < 0) - return compare(); - toggle = ++toggle % 2; + child.on('message', function(data) { + // Construct configuration string, " A=a, B=b, ..." + let conf = ''; + for (const key of Object.keys(data.conf)) { + conf += ' ' + key + '=' + JSON.stringify(data.conf[key]); + } + conf = conf.slice(1); - var node = nodes[toggle]; - console.error('running %s', node); - var env = {}; - for (var i in process.env) - env[i] = process.env[i]; - env.NODE = node; + // Escape qoutes (") for correct csv formatting + conf = conf.replace(/"/g, '""'); - var out = ''; - var child; - if (Array.isArray(benchmarks) && benchmarks.length) { - child = spawn( - node, - ['benchmark/run.js'].concat(benchmarks), - { env: env } - ); - } else { - child = spawn('make', [runBench], { env: env }); - } - child.stdout.setEncoding('utf8'); - child.stdout.on('data', function(c) { - out += c; + console.log(`"${job.binary}", "${job.filename}", "${conf}", ` + + `${data.rate}, ${data.time}`); }); - child.stderr.pipe(process.stderr); - - child.on('close', function(code) { + child.once('close', function(code) { if (code) { - console.error('%s exited with code=%d', node, code); process.exit(code); - } else { - out.trim().split(/\r?\n/).forEach(function(line) { - line = line.trim(); - if (!line) - return; - - var s = line.split(':'); - var num = +s.pop(); - if (!num && num !== 0) - return; - - line = s.join(':'); - var res = results[line] = results[line] || {}; - res[node] = res[node] || []; - res[node].push(num); - }); - - run(); - } - }); -} - -function compare() { - // each result is an object with {"foo.js arg=bar":12345,...} - // compare each thing, and show which node did the best. - // node[0] is shown in green, node[1] shown in red. - var maxLen = -Infinity; - var util = require('util'); - console.log(start); - - Object.keys(results).map(function(bench) { - var res = results[bench]; - var n0 = avg(res[nodes[0]]); - var n1 = avg(res[nodes[1]]); - - var pct = ((n0 - n1) / n1 * 100).toFixed(2); - - var g = n0 > n1 ? green : ''; - var r = n0 > n1 ? '' : red; - var c = r || g; - - if (show === 'green' && !g || show === 'red' && !r) return; + } - var r0 = util.format( - '%s%s: %d%s', - g, - nodes[0], - n0.toPrecision(5), g ? reset : '' - ); - var r1 = util.format( - '%s%s: %d%s', - r, - nodes[1], - n1.toPrecision(5), r ? reset : '' - ); - pct = c + pct + '%' + reset; - var l = util.format('%s: %s %s', bench, r0, r1); - maxLen = Math.max(l.length + pct.length, maxLen); - return [l, pct]; - }).filter(function(l) { - return l; - }).forEach(function(line) { - var l = line[0]; - var pct = line[1]; - var dotLen = maxLen - l.length - pct.length + 2; - var dots = ' ' + new Array(Math.max(0, dotLen)).join('.') + ' '; - console.log(l + dots + pct); + // If there are more benchmarks execute the next + if (i + 1 < queue.length) { + recursive(i + 1); + } }); - console.log(end); -} - -function avg(list) { - if (list.length >= 3) { - list = list.sort(); - var q = Math.floor(list.length / 4) || 1; - list = list.slice(q, -q); - } - return list.reduce(function(a, b) { - return a + b; - }, 0) / list.length; -} +})(0);