diff --git a/benchmark/compare.R b/benchmark/compare.R index 01beb380469305..1200340f329837 100644 --- a/benchmark/compare.R +++ b/benchmark/compare.R @@ -33,30 +33,39 @@ if (!is.null(plot.filename)) { # Print a table with results statistics = ddply(dat, "name", function(subdat) { - # Perform a statistics test to see of there actually is a difference in - # performace. - w = t.test(rate ~ binary, data=subdat); + old.rate = subset(subdat, binary == "old")$rate; + new.rate = subset(subdat, binary == "new")$rate; # Calculate improvement for the "new" binary compared with the "old" binary - new_mu = mean(subset(subdat, binary == "new")$rate); - old_mu = mean(subset(subdat, binary == "old")$rate); - improvement = sprintf("%.2f %%", ((new_mu - old_mu) / old_mu * 100)); + old.mu = mean(old.rate); + new.mu = mean(new.rate); + improvement = sprintf("%.2f %%", ((new.mu - old.mu) / old.mu * 100)); - # Add user friendly stars to the table. There should be at least one star - # before you can say that there is an improvement. - significant = ''; - if (w$p.value < 0.001) { - significant = '***'; - } else if (w$p.value < 0.01) { - significant = '**'; - } else if (w$p.value < 0.05) { - significant = '*'; + p.value = NA; + significant = 'NA'; + # Check if there is enough data to calulate the calculate the p-value + if (length(old.rate) > 1 && length(new.rate) > 1) { + # Perform a statistics test to see of there actually is a difference in + # performance. + w = t.test(rate ~ binary, data=subdat); + p.value = w$p.value; + + # Add user friendly stars to the table. There should be at least one star + # before you can say that there is an improvement. + significant = ''; + if (p.value < 0.001) { + significant = '***'; + } else if (p.value < 0.01) { + significant = '**'; + } else if (p.value < 0.05) { + significant = '*'; + } } r = list( improvement = improvement, significant = significant, - p.value = w$p.value + p.value = p.value ); return(data.frame(r)); }); diff --git a/benchmark/scatter.R b/benchmark/scatter.R index 7b98611482dfb2..10e099e4308a1b 100644 --- a/benchmark/scatter.R +++ b/benchmark/scatter.R @@ -51,13 +51,17 @@ if (length(aggregate) > 0) { stats = ddply(dat, c(x.axis.name, category.name), function(subdat) { rate = subdat$rate; - # calculate standard error of the mean - se = sqrt(var(rate)/length(rate)); + # calculate confidence interval of the mean + ci = NA; + if (length(rate) > 1) { + se = sqrt(var(rate)/length(rate)); + ci = se * qt(0.975, length(rate) - 1) + } # calculate mean and 95 % confidence interval r = list( rate = mean(rate), - confidence.interval = se * qt(0.975, length(rate) - 1) + confidence.interval = ci ); return(data.frame(r)); @@ -66,11 +70,14 @@ stats = ddply(dat, c(x.axis.name, category.name), function(subdat) { print(stats, row.names=F); if (!is.null(plot.filename)) { - p = ggplot(stats, aes_string(x=x.axis.name, y='mean', colour=category.name)); + p = ggplot(stats, aes_string(x=x.axis.name, y='rate', colour=category.name)); if (use.log2) { p = p + scale_x_continuous(trans='log2'); } - p = p + geom_errorbar(aes(ymin=mean-confidence.interval, ymax=mean+confidence.interval), width=.1); + p = p + geom_errorbar( + aes(ymin=rate-confidence.interval, ymax=rate+confidence.interval), + width=.1, na.rm=TRUE + ); p = p + geom_point(); p = p + ylab("rate of operations (higher is better)"); p = p + ggtitle(dat[1, 1]);