From c41c5829c75cc0e4d110cdcc2fcef68390003e14 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Wed, 13 Apr 2022 08:13:21 +0200 Subject: [PATCH] extend ResultsComparer with what is needed to generate monthly perf report (#2362) * switch to System.CommandLine * remove unused features * extend DTOs with Metrics * move common logic to Helper type * move existing logic to TwoInputsComparer * introduce MultipleInputsComparer that produces the matrix * update docs --- .../ResultsComparer/CommandLineOptions.cs | 57 --- .../ResultsComparer/DataTransferContracts.cs | 24 +- src/tools/ResultsComparer/Helper.cs | 55 +++ .../ResultsComparer/MultipleInputsComparer.cs | 239 ++++++++++++ .../ResultsComparer/MultipleInputsOptions.cs | 20 + src/tools/ResultsComparer/Program.cs | 349 ++++++------------ src/tools/ResultsComparer/README.md | 43 +++ .../ResultsComparer/ResultsComparer.csproj | 4 +- src/tools/ResultsComparer/Stats.cs | 121 ++++++ .../ResultsComparer/TwoInputsComparer.cs | 141 +++++++ src/tools/ResultsComparer/TwoInputsOptions.cs | 20 + 11 files changed, 775 insertions(+), 298 deletions(-) delete mode 100644 src/tools/ResultsComparer/CommandLineOptions.cs create mode 100644 src/tools/ResultsComparer/Helper.cs create mode 100644 src/tools/ResultsComparer/MultipleInputsComparer.cs create mode 100644 src/tools/ResultsComparer/MultipleInputsOptions.cs create mode 100644 src/tools/ResultsComparer/Stats.cs create mode 100644 src/tools/ResultsComparer/TwoInputsComparer.cs create mode 100644 src/tools/ResultsComparer/TwoInputsOptions.cs diff --git a/src/tools/ResultsComparer/CommandLineOptions.cs b/src/tools/ResultsComparer/CommandLineOptions.cs deleted file mode 100644 index b668e804dc1..00000000000 --- a/src/tools/ResultsComparer/CommandLineOptions.cs +++ /dev/null @@ -1,57 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System.Collections.Generic; -using System.IO; -using CommandLine; -using CommandLine.Text; - -namespace ResultsComparer -{ - public class CommandLineOptions - { - [Option("base", HelpText = "Path to the folder/file with base results.")] - public string BasePath { get; set; } - - [Option("diff", HelpText = "Path to the folder/file with diff results.")] - public string DiffPath { get; set; } - - [Option("threshold", Required = true, HelpText = "Threshold for Statistical Test. Examples: 5%, 10ms, 100ns, 1s.")] - public string StatisticalTestThreshold { get; set; } - - [Option("noise", HelpText = "Noise threshold for Statistical Test. The difference for 1.0ns and 1.1ns is 10%, but it's just a noise. Examples: 0.5ns 1ns.", Default = "0.3ns" )] - public string NoiseThreshold { get; set; } - - [Option("top", HelpText = "Filter the diff to top/bottom N results. Optional.")] - public int? TopCount { get; set; } - - [Option("csv", HelpText = "Path to exported CSV results. Optional.")] - public FileInfo CsvPath { get; set; } - - [Option("xml", HelpText = "Path to exported XML results. Optional.")] - public FileInfo XmlPath { get; set; } - - [Option('f', "filter", HelpText = "Filter the benchmarks by name using glob pattern(s). Optional.")] - public IEnumerable Filters { get; set; } - - [Option("full-id", HelpText = "Display the full benchmark name id. Optional.")] - public bool FullId { get; set; } - - [Usage(ApplicationAlias = "")] - public static IEnumerable Examples - { - get - { - yield return new Example(@"Compare the results stored in 'C:\results\win' (base) vs 'C:\results\unix' (diff) using 5% threshold.", - new CommandLineOptions { BasePath = @"C:\results\win", DiffPath = @"C:\results\unix", StatisticalTestThreshold = "5%" }); - yield return new Example(@"Compare the results stored in 'C:\results\win' (base) vs 'C:\results\unix' (diff) using 5% threshold and show only top/bottom 10 results.", - new CommandLineOptions { BasePath = @"C:\results\win", DiffPath = @"C:\results\unix", StatisticalTestThreshold = "5%", TopCount = 10 }); - yield return new Example(@"Compare the results stored in 'C:\results\win' (base) vs 'C:\results\unix' (diff) using 5% threshold and 0.5ns noise filter.", - new CommandLineOptions { BasePath = @"C:\results\win", DiffPath = @"C:\results\unix", StatisticalTestThreshold = "5%", NoiseThreshold = "0.5ns" }); - yield return new Example(@"Compare the System.Math benchmark results stored in 'C:\results\ubuntu16' (base) vs 'C:\results\ubuntu18' (diff) using 5% threshold.", - new CommandLineOptions { Filters = new[] { "System.Math*" }, BasePath = @"C:\results\win", DiffPath = @"C:\results\unix", StatisticalTestThreshold = "5%" }); - } - } - } -} \ No newline at end of file diff --git a/src/tools/ResultsComparer/DataTransferContracts.cs b/src/tools/ResultsComparer/DataTransferContracts.cs index 2f9e7ac7ea9..5136e0e7fcb 100644 --- a/src/tools/ResultsComparer/DataTransferContracts.cs +++ b/src/tools/ResultsComparer/DataTransferContracts.cs @@ -5,7 +5,6 @@ // using System.Collections.Generic; -using System.Linq; namespace DataTransferContracts // generated with http://json2csharp.com/# { @@ -60,7 +59,7 @@ public class Percentiles public class Statistics { - public List OriginalValues { get; set; } + public double[] OriginalValues { get; set; } public int N { get; set; } public double Min { get; set; } public double LowerFence { get; set; } @@ -101,10 +100,28 @@ public class Measurement public double Nanoseconds { get; set; } } + public class Metric + { + public double Value { get; set; } + public MetricDescriptor Descriptor { get; set; } + } + + public class MetricDescriptor + { + public string Id { get; set; } + public string DisplayName { get; set; } + public string Legend { get; set; } + public string NumberFormat { get; set; } + public int UnitType { get; set; } + public string Unit { get; set; } + public bool TheGreaterTheBetter { get; set; } + public int PriorityInCategory { get; set; } + } + public class Benchmark { public string DisplayInfo { get; set; } - public object Namespace { get; set; } + public string Namespace { get; set; } public string Type { get; set; } public string Method { get; set; } public string MethodTitle { get; set; } @@ -113,6 +130,7 @@ public class Benchmark public Statistics Statistics { get; set; } public Memory Memory { get; set; } public List Measurements { get; set; } + public List Metrics { get; set; } } public class BdnResult diff --git a/src/tools/ResultsComparer/Helper.cs b/src/tools/ResultsComparer/Helper.cs new file mode 100644 index 00000000000..e3939f6ded0 --- /dev/null +++ b/src/tools/ResultsComparer/Helper.cs @@ -0,0 +1,55 @@ +using DataTransferContracts; +using Newtonsoft.Json; +using Perfolizer.Mathematics.Multimodality; +using System; +using System.IO; + +namespace ResultsComparer +{ + internal static class Helper + { + internal const string FullBdnJsonFileExtension = "full.json"; + + internal static string[] GetFilesToParse(string path) + { + if (Directory.Exists(path)) + return Directory.GetFiles(path, $"*{Helper.FullBdnJsonFileExtension}", SearchOption.AllDirectories); + else if (File.Exists(path) || !path.EndsWith(Helper.FullBdnJsonFileExtension)) + return new[] { path }; + else + throw new FileNotFoundException($"Provided path does NOT exist or is not a {path} file", path); + } + + // code and magic values taken from BenchmarkDotNet.Analysers.MultimodalDistributionAnalyzer + // See http://www.brendangregg.com/FrequencyTrails/modes.html + internal static string GetModalInfo(Benchmark benchmark) + { + if (benchmark.Statistics.N < 12) // not enough data to tell + return null; + + double mValue = MValueCalculator.Calculate(benchmark.Statistics.OriginalValues); + if (mValue > 4.2) + return "multimodal"; + else if (mValue > 3.2) + return "bimodal"; + else if (mValue > 2.8) + return "several?"; + + return null; + } + + internal static BdnResult ReadFromFile(string resultFilePath) + { + try + { + return JsonConvert.DeserializeObject(File.ReadAllText(resultFilePath)); + } + catch (JsonSerializationException) + { + Console.WriteLine($"Exception while reading the {resultFilePath} file."); + + throw; + } + } + } +} diff --git a/src/tools/ResultsComparer/MultipleInputsComparer.cs b/src/tools/ResultsComparer/MultipleInputsComparer.cs new file mode 100644 index 00000000000..9228c21a136 --- /dev/null +++ b/src/tools/ResultsComparer/MultipleInputsComparer.cs @@ -0,0 +1,239 @@ +using DataTransferContracts; +using MarkdownLog; +using Perfolizer.Mathematics.SignificanceTesting; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text.RegularExpressions; + +namespace ResultsComparer +{ + internal static class MultipleInputsComparer + { + internal static void Compare(MultipleInputsOptions args) + { + Console.WriteLine("# Legend"); + Console.WriteLine(); + Console.WriteLine($"* Statistical Test threshold: {args.StatisticalTestThreshold}, the noise filter: {args.NoiseThreshold}"); + Console.WriteLine("* Result is conslusion: Slower|Faster|Same"); + Console.WriteLine("* Base is median base execution time in nanoseconds"); + Console.WriteLine("* Diff is median diff execution time in nanoseconds"); + Console.WriteLine("* Ratio = Base/Diff (the higher the better)"); + Console.WriteLine("* Alloc Delta = Allocated bytes diff - Allocated bytes base (the lower the better)"); + Console.WriteLine("* Base V = Base Runtime Version"); + Console.WriteLine("* Diff V = Diff Runtime Version"); + Console.WriteLine(); + + Stats stats = new Stats(); + + foreach (var benchmarkResults in args.BasePaths + .SelectMany((basePath, index) => GetResults(basePath, args.DiffPaths.ElementAt(index), args, stats)) + .GroupBy(result => result.id, StringComparer.InvariantCulture) + //.Where(group => group.Any(result => result.conclusion == EquivalenceTestConclusion.Slower)) + //.Where(group => !group.All(result => result.conclusion == EquivalenceTestConclusion.Same || result.conclusion == EquivalenceTestConclusion.Base)) // we are not interested in things that did not change + .Take(args.TopCount ?? int.MaxValue) + .OrderBy(group => group.Sum(result => Score(result.conclusion, result.baseEnv, result.baseResult, result.diffResult)))) + { + if (args.PrintStats) + { + stats.Print(); + } + + Console.WriteLine($"## {benchmarkResults.Key}"); + Console.WriteLine(); + + var data = benchmarkResults + .OrderBy(result => Importance(result.baseEnv)) + .Select(result => new + { + Conclusion = result.conclusion, + BaseMedian = result.baseResult.Statistics.Median, + DiffMedian = result.diffResult.Statistics.Median, + Ratio = result.baseResult.Statistics.Median / result.diffResult.Statistics.Median, + AllocatedDiff = GetAllocatedDiff(result.diffResult, result.baseResult), + Modality = Helper.GetModalInfo(result.baseResult) ?? Helper.GetModalInfo(result.diffResult), + OperatingSystem = Stats.GetSimplifiedOSName(result.baseEnv.OsVersion), + Architecture = result.baseEnv.Architecture, + ProcessorName = result.baseEnv.ProcessorName, + BaseRuntimeVersion = GetSimplifiedRuntimeVersion(result.baseEnv.RuntimeVersion), + DiffRuntimeVersion = GetSimplifiedRuntimeVersion(result.diffEnv.RuntimeVersion), + }) + .ToArray(); + + var table = data.ToMarkdownTable().WithHeaders("Result", "Base", "Diff", "Ratio", "Alloc Delta", "Modality", "Operating System", "Bit", "Processor Name", "Base V", "Diff V"); + + foreach (var line in table.ToMarkdown().Split(Environment.NewLine, StringSplitOptions.RemoveEmptyEntries)) + Console.WriteLine($"| {line.TrimStart()}|"); // the table starts with \t and does not end with '|' and it looks bad so we fix it + + Console.WriteLine(); + } + } + + private static string GetAllocatedDiff(Benchmark diffResult, Benchmark baseResult) + { + long baseline = baseResult.Memory.BytesAllocatedPerOperation; + if (baseline == 0) + baseline = GetMetricValue(baseResult); + long diff = diffResult.Memory.BytesAllocatedPerOperation; + if (diff == 0) + diff = GetMetricValue(diffResult); + + return (diff - baseline).ToString("+0;-#"); + + static long GetMetricValue(Benchmark result) + { + if (result.Metrics == null) + return 0; + + double value = result.Metrics.Single(metric => metric.Descriptor.Id == "Allocated Memory").Value; + if (value < 1.0) + return 0; + + return (long)value; + } + } + + private static IEnumerable<(string id, Benchmark baseResult, Benchmark diffResult, EquivalenceTestConclusion conclusion, HostEnvironmentInfo baseEnv, HostEnvironmentInfo diffEnv)> GetResults( + string basePath, string diffPath, MultipleInputsOptions args, Stats stats) + { + foreach (var info in ReadResults(basePath, diffPath, args.Filters) + .Where(result => result.baseResult.Statistics != null && result.diffResult.Statistics != null)) // failures + { + if (info.baseEnv.Architecture != info.diffEnv.Architecture) + throw new InvalidOperationException("Use ResultsComparer to compare different Architectures"); + //if (info.baseEnv.OsVersion != info.diffEnv.OsVersion) + // throw new InvalidOperationException("Use ResultsComparer to compare different OS Versions"); + //if (info.baseEnv.ProcessorName != info.diffEnv.ProcessorName) + // throw new InvalidOperationException("Use ResultsComparer to compare different Processors"); + + var baseValues = info.baseResult.Statistics.OriginalValues; + var diffValues = info.diffResult.Statistics.OriginalValues; + + var userTresholdResult = StatisticalTestHelper.CalculateTost(MannWhitneyTest.Instance, baseValues, diffValues, args.StatisticalTestThreshold); + var noiseResult = StatisticalTestHelper.CalculateTost(MannWhitneyTest.Instance, baseValues, diffValues, args.NoiseThreshold); + + var conclusion = noiseResult.Conclusion == EquivalenceTestConclusion.Same // filter noise (0.20 ns vs 0.25ns etc) + ? noiseResult.Conclusion + : userTresholdResult.Conclusion; + + stats.Record(conclusion, info.baseEnv, info.baseResult); + + yield return (info.id, info.baseResult, info.diffResult, conclusion, info.baseEnv, info.diffEnv); + } + } + + private static IEnumerable<(string id, Benchmark baseResult, Benchmark diffResult, HostEnvironmentInfo baseEnv, HostEnvironmentInfo diffEnv)> + ReadResults(string basePath, string diffPath, IEnumerable filters) + { + var baseFiles = Helper.GetFilesToParse(basePath); + var diffFiles = Helper.GetFilesToParse(diffPath); + + if (!baseFiles.Any() || !diffFiles.Any()) + throw new ArgumentException($"Provided paths contained no {Helper.FullBdnJsonFileExtension} files."); + + var baseResults = baseFiles.Select(Helper.ReadFromFile); + var diffResults = diffFiles.Select(Helper.ReadFromFile); + + var benchmarkIdToDiffResults = new Dictionary(StringComparer.InvariantCulture); + + foreach (var diffResult in diffResults) + { + foreach (var diffBenchmark in diffResult.Benchmarks.Where(benchmarkResult => !filters.Any() || filters.Any(filter => filter.IsMatch(benchmarkResult.FullName)))) + { + benchmarkIdToDiffResults.Add(diffBenchmark.FullName, (diffBenchmark, diffResult.HostEnvironmentInfo)); + } + } + + foreach (var baseResult in baseResults) + { + foreach (var baseBenchmark in baseResult.Benchmarks.Where(result => benchmarkIdToDiffResults.ContainsKey(result.FullName))) + { + (Benchmark diffBenchmark, HostEnvironmentInfo diffEnv) = benchmarkIdToDiffResults[baseBenchmark.FullName]; + + yield return (baseBenchmark.FullName, baseBenchmark, diffBenchmark, baseResult.HostEnvironmentInfo, diffEnv); + } + } + } + + private static double Score(EquivalenceTestConclusion conclusion, HostEnvironmentInfo env, Benchmark baseResult, Benchmark diffResult) + { + switch (conclusion) + { + case EquivalenceTestConclusion.Base: + case EquivalenceTestConclusion.Same: + case EquivalenceTestConclusion.Unknown: + return 0; + case EquivalenceTestConclusion.Faster: + double improvementXtimes = baseResult.Statistics.Median / diffResult.Statistics.Median; + return (double.IsNaN(improvementXtimes) || double.IsInfinity(improvementXtimes)) + ? Importance(env) * 10.0 + : Importance(env) * Math.Min(improvementXtimes, 10.0); + case EquivalenceTestConclusion.Slower: + double regressionXtimes = diffResult.Statistics.Median / baseResult.Statistics.Median; + return (double.IsNaN(regressionXtimes) || double.IsInfinity(regressionXtimes)) + ? Importance(env) * -10.0 + : Importance(env) * Math.Min(regressionXtimes, 10.0) * -1.0; + default: + throw new NotSupportedException($"{conclusion} is not supported"); + } + } + + private static int Importance(HostEnvironmentInfo env) + { + // it's not any kind of official Microsoft priority, just the way I see them: + // 1. x64 Windows + // 2. x64 Linux + // 3. arm64 Linux + // 4. arm64 Windows + // 5. x86 Windows + // 6. arm Windows + // 7. x64 macOS + + if (env.Architecture == "X64" && env.OsVersion.StartsWith("Windows", StringComparison.OrdinalIgnoreCase)) + { + return 1; + } + else if (env.Architecture == "X64" && !env.OsVersion.StartsWith("macOS", StringComparison.OrdinalIgnoreCase)) + { + return 2; + } + else if (env.Architecture == "Arm64" && !env.OsVersion.StartsWith("Windows", StringComparison.OrdinalIgnoreCase)) + { + return 3; + } + else if (env.Architecture == "Arm64") + { + return 4; + } + else if (env.Architecture == "X86") + { + return 5; + } + else if (env.Architecture == "Arm") + { + return 6; + } + else + { + return 7; + } + } + + private static string GetSimplifiedRuntimeVersion(string text) + { + if (text.StartsWith(".NET Core 3", StringComparison.OrdinalIgnoreCase)) + { + // it's something like ".NET Core 3.1.6 (CoreCLR 4.700.20.26901, CoreFX 4.700.20.31603)" + // and what we care about is "3.1.6" + return text.Substring(".NET Core ".Length, "3.1.X".Length); + } + else + { + // it's something like ".NET 6.0.0 (6.0.21.35216)" + // and what we care about is "6.0.21.35216" + int index = text.IndexOf('('); + return text.Substring(index + 1, text.Length - index - 2); + } + } + } +} diff --git a/src/tools/ResultsComparer/MultipleInputsOptions.cs b/src/tools/ResultsComparer/MultipleInputsOptions.cs new file mode 100644 index 00000000000..f261c8d38af --- /dev/null +++ b/src/tools/ResultsComparer/MultipleInputsOptions.cs @@ -0,0 +1,20 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Perfolizer.Mathematics.Thresholds; +using System.Text.RegularExpressions; + +namespace ResultsComparer +{ + public class MultipleInputsOptions + { + public string[] BasePaths { get; init; } + public string[] DiffPaths { get; init; } + public Threshold StatisticalTestThreshold { get; init; } + public Threshold NoiseThreshold { get; init; } + public int? TopCount { get; init; } + public Regex[] Filters { get; init; } + public bool PrintStats { get; init; } + } +} \ No newline at end of file diff --git a/src/tools/ResultsComparer/Program.cs b/src/tools/ResultsComparer/Program.cs index 66d56fe3a1a..b07a06c0570 100644 --- a/src/tools/ResultsComparer/Program.cs +++ b/src/tools/ResultsComparer/Program.cs @@ -2,288 +2,165 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using System; -using System.Collections.Generic; using System.Globalization; using System.IO; -using System.Linq; -using System.Text.RegularExpressions; using System.Threading; -using System.Xml; -using Perfolizer.Mathematics.Multimodality; -using Perfolizer.Mathematics.SignificanceTesting; +using System.CommandLine; +using System.CommandLine.Parsing; using Perfolizer.Mathematics.Thresholds; -using CommandLine; -using DataTransferContracts; -using MarkdownLog; -using Newtonsoft.Json; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text.RegularExpressions; namespace ResultsComparer { public class Program { - private const string FullBdnJsonFileExtension = "full.json"; - - public static void Main(string[] args) + public static int Main(string[] args) { // we print a lot of numbers here and we want to make it always in invariant way Thread.CurrentThread.CurrentCulture = CultureInfo.InvariantCulture; - Parser.Default.ParseArguments(args).WithParsed(Compare); - } - - private static void Compare(CommandLineOptions args) - { - if (!Threshold.TryParse(args.StatisticalTestThreshold, out var testThreshold)) - { - Console.WriteLine($"Invalid Threshold {args.StatisticalTestThreshold}. Examples: 5%, 10ms, 100ns, 1s."); - return; - } - if (!Threshold.TryParse(args.NoiseThreshold, out var noiseThreshold)) - { - Console.WriteLine($"Invalid Noise Threshold {args.NoiseThreshold}. Examples: 0.3ns 1ns."); - return; - } - - var notSame = GetNotSameResults(args, testThreshold, noiseThreshold).ToArray(); - - if (!notSame.Any()) + Option basePath = new Option( + new[] { "--base", "-b" }, "Path to the folder/file with base results."); + Option diffPath = new Option( + new[] { "--diff", "-d" }, "Path to the folder/file with diff results."); + Option threshold = new Option( + new[] { "--threshold", "-t" }, "Threshold for Statistical Test. Examples: 5%, 10ms, 100ns, 1s."); + Option noise = new Option( + new[] { "--noise", "-n" }, () => "0.3ns", "Noise threshold for Statistical Test. The difference for 1.0ns and 1.1ns is 10%, but it's just a noise. Examples: 0.5ns 1ns."); + Option top = new Option( + new[] { "--top" }, "Filter the diff to top/bottom N results. Optional."); + Option filters = new Option( + new[] { "--filter", "-f" }, "Filter the benchmarks by name using glob pattern(s)."); + Option fullId = new Option( + new[] { "--full-id" }, "Display the full benchmark name id."); + + threshold.IsRequired = true; + + RootCommand rootCommand = new RootCommand { - Console.WriteLine($"No differences found between the benchmark results with threshold {testThreshold}."); - return; - } - - PrintSummary(notSame); + basePath, diffPath, threshold, noise, top, filters, fullId + }; - PrintTable(notSame, EquivalenceTestConclusion.Slower, args); - PrintTable(notSame, EquivalenceTestConclusion.Faster, args); - - ExportToCsv(notSame, args.CsvPath); - ExportToXml(notSame, args.XmlPath); - } - - private static IEnumerable<(string id, Benchmark baseResult, Benchmark diffResult, EquivalenceTestConclusion conclusion)> GetNotSameResults(CommandLineOptions args, Threshold testThreshold, Threshold noiseThreshold) - { - foreach ((string id, Benchmark baseResult, Benchmark diffResult) in ReadResults(args) - .Where(result => result.baseResult.Statistics != null && result.diffResult.Statistics != null)) // failures + rootCommand.SetHandler( + static (basePath, diffPath, threshold, noise, top, filters, fullId) => + { + if (TryParseThresholds(threshold, noise, out var testThreshold, out var noiseThreshold)) + { + TwoInputsComparer.Compare(new TwoInputsOptions + { + BasePath = basePath, + DiffPath = diffPath, + StatisticalTestThreshold = testThreshold, + NoiseThreshold = noiseThreshold, + TopCount = top, + Filters = GetFilters(filters), + FullId = fullId + }); + } + }, + basePath, diffPath, threshold, noise, top, filters, fullId); + + Option input = new Option( + new[] { "--input", "-i" }, "Path to the Input folder with BenchmarkDotNet .json files."); + Option basePattern = new Option( + new[] { "--base" }, "Pattern used to search for base results in Input folder. Example: net7.0-preview2"); + Option diffPattern = new Option( + new[] { "--diff" }, "Pattern used to search for diff results in Input folder. Example: net7.0-preview3"); + Option printStats = new Option( + new[] { "--stats" }, () => true, "Prints summary per Architecture, Namespace and Operating System."); + + input.IsRequired = true; + basePattern.IsRequired = true; + diffPattern.IsRequired = true; + + Command matrixCommand = new Command("matrix", "Produces a matrix for all configurations found in given folder.") { - var baseValues = baseResult.Statistics.OriginalValues.ToArray(); - var diffValues = diffResult.Statistics.OriginalValues.ToArray(); + input, basePattern, diffPattern, threshold, noise, top, filters, printStats + }; - var userTresholdResult = StatisticalTestHelper.CalculateTost(MannWhitneyTest.Instance, baseValues, diffValues, testThreshold); - if (userTresholdResult.Conclusion == EquivalenceTestConclusion.Same) - continue; + rootCommand.AddCommand(matrixCommand); - var noiseResult = StatisticalTestHelper.CalculateTost(MannWhitneyTest.Instance, baseValues, diffValues, noiseThreshold); - if (noiseResult.Conclusion == EquivalenceTestConclusion.Same) - continue; - - yield return (id, baseResult, diffResult, userTresholdResult.Conclusion); - } + matrixCommand.SetHandler( + static (input, basePattern, diffPattern, threshold, noise, top, filters, printStats) => + { + if (TryParseThresholds(threshold, noise, out var testThreshold, out var noiseThreshold) + && TryGetPaths(input, basePattern, diffPattern, out var basePaths, out var diffPaths)) + { + MultipleInputsComparer.Compare(new MultipleInputsOptions + { + BasePaths = basePaths.ToArray(), + DiffPaths = diffPaths.ToArray(), + StatisticalTestThreshold = testThreshold, + NoiseThreshold = noiseThreshold, + TopCount = top, + Filters = GetFilters(filters), + PrintStats = printStats + }); + } + }, input, basePattern, diffPattern, threshold, noise, top, filters, printStats); + + return rootCommand.Invoke(args); } - private static void PrintSummary((string id, Benchmark baseResult, Benchmark diffResult, EquivalenceTestConclusion conclusion)[] notSame) + private static bool TryParseThresholds(string test, string noise, out Threshold testThreshold, out Threshold noiseThreshold) { - var better = notSame.Where(result => result.conclusion == EquivalenceTestConclusion.Faster); - var worse = notSame.Where(result => result.conclusion == EquivalenceTestConclusion.Slower); - var betterCount = better.Count(); - var worseCount = worse.Count(); - - // If the baseline doesn't have the same set of tests, you wind up with Infinity in the list of diffs. - // Exclude them for purposes of geomean. - worse = worse.Where(x => GetRatio(x) != double.PositiveInfinity); - better = better.Where(x => GetRatio(x) != double.PositiveInfinity); - - Console.WriteLine("summary:"); - - if (betterCount > 0) + if (!Threshold.TryParse(test, out testThreshold)) { - var betterGeoMean = Math.Pow(10, better.Skip(1).Aggregate(Math.Log10(GetRatio(better.First())), (x, y) => x + Math.Log10(GetRatio(y))) / better.Count()); - Console.WriteLine($"better: {betterCount}, geomean: {betterGeoMean:F3}"); + Console.WriteLine($"Invalid Threshold '{test}'. Examples: 5%, 10ms, 100ns, 1s."); + noiseThreshold = null; + return false; } - - if (worseCount > 0) + if (!Threshold.TryParse(noise, out noiseThreshold)) { - var worseGeoMean = Math.Pow(10, worse.Skip(1).Aggregate(Math.Log10(GetRatio(worse.First())), (x, y) => x + Math.Log10(GetRatio(y))) / worse.Count()); - Console.WriteLine($"worse: {worseCount}, geomean: {worseGeoMean:F3}"); + Console.WriteLine($"Invalid Noise Threshold '{noise}'. Examples: 0.3ns 1ns."); + return false; } - Console.WriteLine($"total diff: {notSame.Count()}"); - Console.WriteLine(); + return true; } - private static void PrintTable((string id, Benchmark baseResult, Benchmark diffResult, EquivalenceTestConclusion conclusion)[] notSame, EquivalenceTestConclusion conclusion, CommandLineOptions args) + private static bool TryGetPaths(DirectoryInfo input, string basePattern, string diffPattern, out List basePaths, out List diffPaths) { - var data = notSame - .Where(result => result.conclusion == conclusion) - .OrderByDescending(result => GetRatio(conclusion, result.baseResult, result.diffResult)) - .Take(args.TopCount ?? int.MaxValue) - .Select(result => new - { - Id = (result.id.Length <= 80 || args.FullId) ? result.id : result.id.Substring(0, 80), - DisplayValue = GetRatio(conclusion, result.baseResult, result.diffResult), - BaseMedian = result.baseResult.Statistics.Median, - DiffMedian = result.diffResult.Statistics.Median, - Modality = GetModalInfo(result.baseResult) ?? GetModalInfo(result.diffResult) - }) - .ToArray(); + basePaths = diffPaths = null; - if (!data.Any()) + if (!input.Exists) { - Console.WriteLine($"No {conclusion} results for the provided threshold = {args.StatisticalTestThreshold} and noise filter = {args.NoiseThreshold}."); - Console.WriteLine(); - return; + Console.WriteLine($"Provided Input folder '{input.FullName}' does NOT exist."); + return false; } - var table = data.ToMarkdownTable().WithHeaders(conclusion.ToString(), conclusion == EquivalenceTestConclusion.Faster ? "base/diff" : "diff/base", "Base Median (ns)", "Diff Median (ns)", "Modality"); - - foreach (var line in table.ToMarkdown().Split(Environment.NewLine, StringSplitOptions.RemoveEmptyEntries)) - Console.WriteLine($"| {line.TrimStart()}|"); // the table starts with \t and does not end with '|' and it looks bad so we fix it - - Console.WriteLine(); - } - - private static IEnumerable<(string id, Benchmark baseResult, Benchmark diffResult)> ReadResults(CommandLineOptions args) - { - var baseFiles = GetFilesToParse(args.BasePath); - var diffFiles = GetFilesToParse(args.DiffPath); - - if (!baseFiles.Any() || !diffFiles.Any()) - throw new ArgumentException($"Provided paths contained no {FullBdnJsonFileExtension} files."); + basePaths = new List(); + diffPaths = new List(); - var baseResults = baseFiles.Select(ReadFromFile); - var diffResults = diffFiles.Select(ReadFromFile); - - var filters = args.Filters.Select(pattern => new Regex(WildcardToRegex(pattern), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant)).ToArray(); - - var benchmarkIdToDiffResults = diffResults - .SelectMany(result => result.Benchmarks) - .Where(benchmarkResult => !filters.Any() || filters.Any(filter => filter.IsMatch(benchmarkResult.FullName))) - .ToDictionary(benchmarkResult => benchmarkResult.FullName, benchmarkResult => benchmarkResult); - - return baseResults - .SelectMany(result => result.Benchmarks) - .ToDictionary(benchmarkResult => benchmarkResult.FullName, benchmarkResult => benchmarkResult) // we use ToDictionary to make sure the results have unique IDs - .Where(baseResult => benchmarkIdToDiffResults.ContainsKey(baseResult.Key)) - .Select(baseResult => (baseResult.Key, baseResult.Value, benchmarkIdToDiffResults[baseResult.Key])); - } - - private static void ExportToCsv((string id, Benchmark baseResult, Benchmark diffResult, EquivalenceTestConclusion conclusion)[] notSame, FileInfo csvPath) - { - if (csvPath == null) - return; - - if (csvPath.Exists) - csvPath.Delete(); - - using (var textWriter = csvPath.CreateText()) - { - foreach (var (id, baseResult, diffResult, conclusion) in notSame) - { - textWriter.WriteLine($"\"{id.Replace("\"", "\"\"")}\";base;{conclusion};{string.Join(';', baseResult.Statistics.OriginalValues)}"); - textWriter.WriteLine($"\"{id.Replace("\"", "\"\"")}\";diff;{conclusion};{string.Join(';', diffResult.Statistics.OriginalValues)}"); - } - } - - Console.WriteLine($"CSV results exported to {csvPath.FullName}"); - } - - private static void ExportToXml((string id, Benchmark baseResult, Benchmark diffResult, EquivalenceTestConclusion conclusion)[] notSame, FileInfo xmlPath) - { - if (xmlPath == null) - { - Console.WriteLine("No file given"); - return; - } - - if (xmlPath.Exists) - xmlPath.Delete(); - - using (XmlWriter writer = XmlWriter.Create(xmlPath.Open(FileMode.OpenOrCreate, FileAccess.Write, FileShare.Write))) + foreach (var baseline in input.GetDirectories($"*{basePattern}*")) { - writer.WriteStartElement("performance-tests"); - foreach (var (id, baseResult, diffResult, conclusion) in notSame.Where(x => x.conclusion == EquivalenceTestConclusion.Slower)) + var current = baseline.FullName.Replace(basePattern, diffPattern); + if (Directory.Exists(current)) { - writer.WriteStartElement("test"); - writer.WriteAttributeString("name", id); - writer.WriteAttributeString("type", baseResult.Type); - writer.WriteAttributeString("method", baseResult.Method); - writer.WriteAttributeString("time", "0"); - writer.WriteAttributeString("result", "Fail"); - writer.WriteStartElement("failure"); - writer.WriteAttributeString("exception-type", "Regression"); - writer.WriteElementString("message", $"{id} has regressed, was {baseResult.Statistics.Median} is {diffResult.Statistics.Median}."); - writer.WriteEndElement(); + basePaths.Add(baseline.FullName); + diffPaths.Add(current); } - - foreach (var (id, baseResult, diffResult, conclusion) in notSame.Where(x => x.conclusion == EquivalenceTestConclusion.Faster)) + else { - writer.WriteStartElement("test"); - writer.WriteAttributeString("name", id); - writer.WriteAttributeString("type", baseResult.Type); - writer.WriteAttributeString("method", baseResult.Method); - writer.WriteAttributeString("time", "0"); - writer.WriteAttributeString("result", "Skip"); - writer.WriteElementString("reason", $"{id} has improved, was {baseResult.Statistics.Median} is {diffResult.Statistics.Median}."); - writer.WriteEndElement(); + Console.WriteLine($"Base results folder '{baseline.FullName}' has no corresponding diff results folder ('{current}')."); } - - writer.WriteEndElement(); - writer.Flush(); } - Console.WriteLine($"XML results exported to {xmlPath.FullName}"); - } - - private static string[] GetFilesToParse(string path) - { - if (Directory.Exists(path)) - return Directory.GetFiles(path, $"*{FullBdnJsonFileExtension}", SearchOption.AllDirectories); - else if (File.Exists(path) || !path.EndsWith(FullBdnJsonFileExtension)) - return new[] { path }; - else - throw new FileNotFoundException($"Provided path does NOT exist or is not a {path} file", path); - } - - // code and magic values taken from BenchmarkDotNet.Analysers.MultimodalDistributionAnalyzer - // See http://www.brendangregg.com/FrequencyTrails/modes.html - private static string GetModalInfo(Benchmark benchmark) - { - if (benchmark.Statistics.N < 12) // not enough data to tell - return null; - - double mValue = MValueCalculator.Calculate(benchmark.Statistics.OriginalValues); - if (mValue > 4.2) - return "multimodal"; - else if (mValue > 3.2) - return "bimodal"; - else if (mValue > 2.8) - return "several?"; - - return null; - } - - private static double GetRatio((string id, Benchmark baseResult, Benchmark diffResult, EquivalenceTestConclusion conclusion) item) => GetRatio(item.conclusion, item.baseResult, item.diffResult); - - private static double GetRatio(EquivalenceTestConclusion conclusion, Benchmark baseResult, Benchmark diffResult) - => conclusion == EquivalenceTestConclusion.Faster - ? baseResult.Statistics.Median / diffResult.Statistics.Median - : diffResult.Statistics.Median / baseResult.Statistics.Median; - - private static BdnResult ReadFromFile(string resultFilePath) - { - try + if (!basePaths.Any()) { - return JsonConvert.DeserializeObject(File.ReadAllText(resultFilePath)); + Console.WriteLine($"Provided Input folder '{input.FullName}' does contain any subfolders that match the base pattern ('{basePattern}')."); + return false; } - catch (JsonSerializationException) - { - Console.WriteLine($"Exception while reading the {resultFilePath} file."); - throw; - } + return true; } + private static Regex[] GetFilters(string[] filters) + => filters.Select(pattern => new Regex(WildcardToRegex(pattern), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant)).ToArray(); + // https://stackoverflow.com/a/6907849/5852046 not perfect but should work for all we need private static string WildcardToRegex(string pattern) => $"^{Regex.Escape(pattern).Replace(@"\*", ".*").Replace(@"\?", ".")}$"; } diff --git a/src/tools/ResultsComparer/README.md b/src/tools/ResultsComparer/README.md index 9cc2aefa9c0..4b78528bb5f 100644 --- a/src/tools/ResultsComparer/README.md +++ b/src/tools/ResultsComparer/README.md @@ -42,3 +42,46 @@ dotnet run --base "C:\results\windows" --diff "C:\results\ubuntu" --threshold 1% | System.Tests.Perf_Array.ArrayCopy3D | 1.31 | 372.71 | 284.73 | | If there is no difference or if there is no match (we use full benchmark names to match the benchmarks), then the results are omitted. + +## Matrix + +The tools supports also comparing multiple result sets. For up-to-date help please run `dotnet run -- matrix --help` + +Sample usage: + +```cmd +dotnet run -c Release matrix --input D:\results\p3_all\ --base net7.0-preview2 --diff net7.0-preview3 --threshold 10% --noise 2ns --filter System.IO* +``` + +Sample results: + +## System.IO.Tests.Perf_File.WriteAllText(size: 10000) + +| Result | Base | Diff | Ratio | Alloc Delta | Modality | Operating System | Bit | Processor Name | Base V | Diff V | +| ------ | ----------:| ----------:| -----:| -----------:| -------- | --------------------- | ----- | ----------------------------------------------- | ------------ | ------------ | +| Same | 939321.02 | 1031195.70 | 0.91 | +0 | several? | Windows 10 | X64 | Intel Xeon CPU E5-1650 v4 3.60GHz | 7.0.22.12204 | 7.0.22.17504| +| Faster | 1059005.27 | 598518.92 | 1.77 | +0 | bimodal | Windows 11 | X64 | AMD Ryzen Threadripper PRO 3945WX 12-Cores | 7.0.22.12204 | 7.0.22.17504| +| Faster | 937008.80 | 551313.28 | 1.70 | +0 | several? | Windows 11 | X64 | AMD Ryzen 9 5900X | 7.0.22.12204 | 7.0.22.17504| +| Faster | 4346259.38 | 3206257.03 | 1.36 | +0 | several? | Windows 11 | X64 | Intel Core i5-4300U CPU 1.90GHz (Haswell) | 7.0.22.12204 | 7.0.22.17504| +| Faster | 2573217.71 | 832166.18 | 3.09 | -6 | | Windows 11 | X64 | Unknown processor | 7.0.22.12204 | 7.0.22.17504| +| Same | 235188.35 | 217942.50 | 1.08 | +0 | | Windows 11 | X64 | Intel Core i7-8700 CPU 3.20GHz (Coffee Lake) | 7.0.22.12204 | 7.0.22.17504| +| Same | 824210.94 | 749032.29 | 1.10 | +1 | | Windows 11 | X64 | Intel Core i9-9900T CPU 2.10GHz | 7.0.22.12204 | 7.0.22.17504| +| Same | 50128.53 | 50988.47 | 0.98 | +0 | | alpine 3.13 | X64 | Intel Core i7-7700 CPU 3.60GHz (Kaby Lake) | 7.0.22.12204 | 7.0.22.17504| +| Same | 79680.16 | 78657.24 | 1.01 | +0 | | centos 7 | X64 | Intel Xeon CPU E5530 2.40GHz | 7.0.22.12204 | 7.0.22.17504| +| Same | 48132.14 | 48840.28 | 0.99 | +0 | | debian 11 | X64 | Intel Core i7-7700 CPU 3.60GHz (Kaby Lake) | 7.0.22.12204 | 7.0.22.17504| +| Same | 42636.21 | 44366.44 | 0.96 | +0 | several? | pop 20.04 | X64 | Intel Core i7-6600U CPU 2.60GHz (Skylake) | 7.0.22.12204 | 7.0.22.17504| +| Same | 32762.42 | 32443.19 | 1.01 | +0 | bimodal | ubuntu 18.04 | X64 | Intel Xeon CPU E5-1650 v4 3.60GHz | 7.0.22.12204 | 7.0.22.17504| +| Faster | 64744.24 | 55839.56 | 1.16 | +0 | bimodal | ubuntu 18.04 | X64 | Intel Core i7-2720QM CPU 2.20GHz (Sandy Bridge) | 7.0.22.12204 | 7.0.22.17504| +| Same | 3684335.97 | 3726101.03 | 0.99 | +0 | | alpine 3.12 | Arm64 | Unknown processor | 7.0.22.12204 | 7.0.22.17504| +| Same | 60851.89 | 57414.92 | 1.06 | +0 | | debian 11 | Arm64 | Unknown processor | 7.0.22.12204 | 7.0.22.17504| +| Same | 84304.48 | 83274.12 | 1.01 | +0 | | ubuntu 18.04 | Arm64 | Unknown processor | 7.0.22.12204 | 7.0.22.17504| +| Faster | 2489377.68 | 515978.13 | 4.82 | -5 | | Windows 10 | Arm64 | Microsoft SQ1 3.0 GHz | 7.0.22.12204 | 7.0.22.17504| +| Faster | 2675980.21 | 939078.31 | 2.85 | -5 | | Windows 11 | Arm64 | Microsoft SQ1 3.0 GHz | 7.0.22.12204 | 7.0.22.17504| +| Faster | 1158829.33 | 469372.13 | 2.47 | -1 | | Windows 10 | X86 | Intel Xeon CPU E5-1650 v4 3.60GHz | 7.0.22.12204 | 7.0.22.17504| +| Faster | 929645.42 | 507981.70 | 1.83 | -2 | bimodal | Windows 11 | X86 | AMD Ryzen Threadripper PRO 3945WX 12-Cores | 7.0.22.12204 | 7.0.22.17504| +| Faster | 3215358.93 | 440157.77 | 7.31 | -6 | | Windows 11 | X86 | Intel Core i7-10510U CPU 1.80GHz | 7.0.22.12204 | 7.0.22.17504| +| Same | 126829.97 | 121465.99 | 1.04 | +0 | | Windows 7 SP1 | X86 | Intel Core i7-7700 CPU 3.60GHz (Kaby Lake) | 7.0.22.12204 | 7.0.22.17504| +| Same | 218819.23 | 214187.24 | 1.02 | -1 | bimodal | ubuntu 18.04 | Arm | ARMv7 Processor rev 3 (v7l) | 7.0.22.12204 | 7.0.22.17504| +| Faster | 2478265.18 | 547273.17 | 4.53 | -5 | | Windows 10 | Arm | Microsoft SQ1 3.0 GHz | 7.0.22.12204 | 7.0.22.17504| +| Same | 161909.04 | 158812.34 | 1.02 | +0 | | macOS Monterey 12.2.1 | X64 | Intel Core i7-5557U CPU 3.10GHz (Broadwell) | 7.0.22.12204 | 7.0.22.17504| +| Same | 121620.87 | 122424.61 | 0.99 | +0 | | macOS Monterey 12.3.1 | X64 | Intel Core i7-4870HQ CPU 2.50GHz (Haswell) | 7.0.22.12204 | 7.0.22.17504| diff --git a/src/tools/ResultsComparer/ResultsComparer.csproj b/src/tools/ResultsComparer/ResultsComparer.csproj index 9934bd0df21..2c82d2aeaf3 100644 --- a/src/tools/ResultsComparer/ResultsComparer.csproj +++ b/src/tools/ResultsComparer/ResultsComparer.csproj @@ -2,14 +2,14 @@ Exe $(PERFLAB_TARGET_FRAMEWORKS) - net5.0 + net6.0 latest - + diff --git a/src/tools/ResultsComparer/Stats.cs b/src/tools/ResultsComparer/Stats.cs new file mode 100644 index 00000000000..9cf1bc42443 --- /dev/null +++ b/src/tools/ResultsComparer/Stats.cs @@ -0,0 +1,121 @@ +using DataTransferContracts; +using MarkdownLog; +using Perfolizer.Mathematics.SignificanceTesting; +using System; +using System.Collections.Generic; +using System.Linq; + +namespace ResultsComparer +{ + internal class Stats + { + private readonly Dictionary perArchitecture = new(); + private readonly Dictionary perNamespace = new(); + private readonly Dictionary perOS = new(); + private readonly PerConclusion totals = new(); + private bool printed = false; + + internal void Record(EquivalenceTestConclusion conclusion, HostEnvironmentInfo envInfo, Benchmark benchmark) + { + totals.Update(conclusion); + + Record(perArchitecture, envInfo.Architecture, conclusion); + Record(perOS, GetSimplifiedOSName(envInfo.OsVersion), conclusion); + + if (!string.IsNullOrEmpty(benchmark.Namespace)) // some benchmarks have no namespace ;) + { + Record(perNamespace, benchmark.Namespace, conclusion); + } + + static void Record(Dictionary dictionary, string key, EquivalenceTestConclusion conclusion) + { + if (!dictionary.TryGetValue(key, out var stats)) + { + dictionary[key] = stats = new PerConclusion(); + } + stats.Update(conclusion); + } + } + + internal void Print() + { + if (printed) + { + return; // print them only once + } + printed = true; + + totals.Print(); + + Print(perArchitecture, "Architecture"); + Print(perOS, "Operating System"); + Print(perNamespace, "Namespace"); + + static void Print(Dictionary dictionary, string name) + { + Console.WriteLine($"## Statistics per {name}"); + Console.WriteLine(); + + var data = dictionary.Select(pair => new + { + Key = pair.Key, + Same = ((double)pair.Value.Same / pair.Value.Total).ToString("P2"), + Slower = ((double)pair.Value.Slower / pair.Value.Total).ToString("P2"), + Faster = ((double)pair.Value.Faster / pair.Value.Total).ToString("P2"), + Unknown = ((double)pair.Value.Unknown / pair.Value.Total).ToString("P2"), + }) + .ToArray(); + + var table = data.ToMarkdownTable().WithHeaders(name, "Same", "Slower", "Faster", "Unknown"); + + foreach (var line in table.ToMarkdown().Split(Environment.NewLine, StringSplitOptions.RemoveEmptyEntries)) + Console.WriteLine($"| {line.TrimStart()}|"); // the table starts with \t and does not end with '|' and it looks bad so we fix it + + Console.WriteLine(); + } + } + + internal static string GetSimplifiedOSName(string text) => text.Split('(')[0]; + + private class PerConclusion + { + internal long Total, Faster, Slower, Same, Unknown; + + internal void Update(EquivalenceTestConclusion conclusion) + { + Total++; + + switch (conclusion) + { + case EquivalenceTestConclusion.Base: + case EquivalenceTestConclusion.Same: + Same++; + break; + case EquivalenceTestConclusion.Faster: + Faster++; + break; + case EquivalenceTestConclusion.Slower: + Slower++; + break; + case EquivalenceTestConclusion.Unknown: + Unknown++; + break; + default: + throw new NotSupportedException($"Invalid conclusion! {conclusion}"); + } + } + + internal void Print() + { + Console.WriteLine("## Statistics"); + Console.WriteLine(); + Console.WriteLine($"Total: {Total}"); + Console.WriteLine($"Same: {(double)Same / Total:P2}"); + Console.WriteLine($"Slower: {(double)Slower / Total:P2}"); + Console.WriteLine($"Faster: {(double)Faster / Total:P2}"); + Console.WriteLine($"Unknown: {(double)Unknown / Total:P2}"); + Console.WriteLine(); + } + } + } +} diff --git a/src/tools/ResultsComparer/TwoInputsComparer.cs b/src/tools/ResultsComparer/TwoInputsComparer.cs new file mode 100644 index 00000000000..89209aefb58 --- /dev/null +++ b/src/tools/ResultsComparer/TwoInputsComparer.cs @@ -0,0 +1,141 @@ +using DataTransferContracts; +using MarkdownLog; +using Perfolizer.Mathematics.SignificanceTesting; +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text.RegularExpressions; + +namespace ResultsComparer +{ + internal static class TwoInputsComparer + { + internal static void Compare(TwoInputsOptions args) + { + var notSame = GetNotSameResults(args).ToArray(); + + if (!notSame.Any()) + { + Console.WriteLine($"No differences found between the benchmark results with threshold {args.StatisticalTestThreshold}."); + return; + } + + PrintSummary(notSame); + + PrintTable(notSame, EquivalenceTestConclusion.Slower, args); + PrintTable(notSame, EquivalenceTestConclusion.Faster, args); + } + + private static IEnumerable<(string id, Benchmark baseResult, Benchmark diffResult, EquivalenceTestConclusion conclusion)> GetNotSameResults(TwoInputsOptions args) + { + foreach ((string id, Benchmark baseResult, Benchmark diffResult) in ReadResults(args) + .Where(result => result.baseResult.Statistics != null && result.diffResult.Statistics != null)) // failures + { + var baseValues = baseResult.Statistics.OriginalValues.ToArray(); + var diffValues = diffResult.Statistics.OriginalValues.ToArray(); + + var userTresholdResult = StatisticalTestHelper.CalculateTost(MannWhitneyTest.Instance, baseValues, diffValues, args.StatisticalTestThreshold); + if (userTresholdResult.Conclusion == EquivalenceTestConclusion.Same) + continue; + + var noiseResult = StatisticalTestHelper.CalculateTost(MannWhitneyTest.Instance, baseValues, diffValues, args.NoiseThreshold); + if (noiseResult.Conclusion == EquivalenceTestConclusion.Same) + continue; + + yield return (id, baseResult, diffResult, userTresholdResult.Conclusion); + } + } + + private static void PrintSummary((string id, Benchmark baseResult, Benchmark diffResult, EquivalenceTestConclusion conclusion)[] notSame) + { + var better = notSame.Where(result => result.conclusion == EquivalenceTestConclusion.Faster); + var worse = notSame.Where(result => result.conclusion == EquivalenceTestConclusion.Slower); + var betterCount = better.Count(); + var worseCount = worse.Count(); + + // If the baseline doesn't have the same set of tests, you wind up with Infinity in the list of diffs. + // Exclude them for purposes of geomean. + worse = worse.Where(x => GetRatio(x) != double.PositiveInfinity); + better = better.Where(x => GetRatio(x) != double.PositiveInfinity); + + Console.WriteLine("summary:"); + + if (betterCount > 0) + { + var betterGeoMean = Math.Pow(10, better.Skip(1).Aggregate(Math.Log10(GetRatio(better.First())), (x, y) => x + Math.Log10(GetRatio(y))) / better.Count()); + Console.WriteLine($"better: {betterCount}, geomean: {betterGeoMean:F3}"); + } + + if (worseCount > 0) + { + var worseGeoMean = Math.Pow(10, worse.Skip(1).Aggregate(Math.Log10(GetRatio(worse.First())), (x, y) => x + Math.Log10(GetRatio(y))) / worse.Count()); + Console.WriteLine($"worse: {worseCount}, geomean: {worseGeoMean:F3}"); + } + + Console.WriteLine($"total diff: {notSame.Count()}"); + Console.WriteLine(); + } + + private static void PrintTable((string id, Benchmark baseResult, Benchmark diffResult, EquivalenceTestConclusion conclusion)[] notSame, EquivalenceTestConclusion conclusion, TwoInputsOptions args) + { + var data = notSame + .Where(result => result.conclusion == conclusion) + .OrderByDescending(result => GetRatio(conclusion, result.baseResult, result.diffResult)) + .Take(args.TopCount ?? int.MaxValue) + .Select(result => new + { + Id = (result.id.Length <= 80 || args.FullId) ? result.id : result.id.Substring(0, 80), + DisplayValue = GetRatio(conclusion, result.baseResult, result.diffResult), + BaseMedian = result.baseResult.Statistics.Median, + DiffMedian = result.diffResult.Statistics.Median, + Modality = Helper.GetModalInfo(result.baseResult) ?? Helper.GetModalInfo(result.diffResult) + }) + .ToArray(); + + if (!data.Any()) + { + Console.WriteLine($"No {conclusion} results for the provided threshold = {args.StatisticalTestThreshold} and noise filter = {args.NoiseThreshold}."); + Console.WriteLine(); + return; + } + + var table = data.ToMarkdownTable().WithHeaders(conclusion.ToString(), conclusion == EquivalenceTestConclusion.Faster ? "base/diff" : "diff/base", "Base Median (ns)", "Diff Median (ns)", "Modality"); + + foreach (var line in table.ToMarkdown().Split(Environment.NewLine, StringSplitOptions.RemoveEmptyEntries)) + Console.WriteLine($"| {line.TrimStart()}|"); // the table starts with \t and does not end with '|' and it looks bad so we fix it + + Console.WriteLine(); + } + + private static IEnumerable<(string id, Benchmark baseResult, Benchmark diffResult)> ReadResults(TwoInputsOptions args) + { + var baseFiles = Helper.GetFilesToParse(args.BasePath); + var diffFiles = Helper.GetFilesToParse(args.DiffPath); + + if (!baseFiles.Any() || !diffFiles.Any()) + throw new ArgumentException($"Provided paths contained no {Helper.FullBdnJsonFileExtension} files."); + + var baseResults = baseFiles.Select(Helper.ReadFromFile); + var diffResults = diffFiles.Select(Helper.ReadFromFile); + + var benchmarkIdToDiffResults = diffResults + .SelectMany(result => result.Benchmarks) + .Where(benchmarkResult => !args.Filters.Any() || args.Filters.Any(filter => filter.IsMatch(benchmarkResult.FullName))) + .ToDictionary(benchmarkResult => benchmarkResult.FullName, benchmarkResult => benchmarkResult); + + return baseResults + .SelectMany(result => result.Benchmarks) + .ToDictionary(benchmarkResult => benchmarkResult.FullName, benchmarkResult => benchmarkResult) // we use ToDictionary to make sure the results have unique IDs + .Where(baseResult => benchmarkIdToDiffResults.ContainsKey(baseResult.Key)) + .Select(baseResult => (baseResult.Key, baseResult.Value, benchmarkIdToDiffResults[baseResult.Key])); + } + + private static double GetRatio((string id, Benchmark baseResult, Benchmark diffResult, EquivalenceTestConclusion conclusion) item) => GetRatio(item.conclusion, item.baseResult, item.diffResult); + + private static double GetRatio(EquivalenceTestConclusion conclusion, Benchmark baseResult, Benchmark diffResult) + => conclusion == EquivalenceTestConclusion.Faster + ? baseResult.Statistics.Median / diffResult.Statistics.Median + : diffResult.Statistics.Median / baseResult.Statistics.Median; + } +} diff --git a/src/tools/ResultsComparer/TwoInputsOptions.cs b/src/tools/ResultsComparer/TwoInputsOptions.cs new file mode 100644 index 00000000000..9048b1007b5 --- /dev/null +++ b/src/tools/ResultsComparer/TwoInputsOptions.cs @@ -0,0 +1,20 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Perfolizer.Mathematics.Thresholds; +using System.Text.RegularExpressions; + +namespace ResultsComparer +{ + public class TwoInputsOptions + { + public string BasePath { get; init; } + public string DiffPath { get; init; } + public Threshold StatisticalTestThreshold { get; init; } + public Threshold NoiseThreshold { get; init; } + public int? TopCount { get; init; } + public Regex[] Filters { get; init; } + public bool FullId { get; init; } + } +} \ No newline at end of file