diff --git a/src/coreclr/scripts/superpmi.py b/src/coreclr/scripts/superpmi.py index 33c27755a95409..53cf7f2d21c2aa 100644 --- a/src/coreclr/scripts/superpmi.py +++ b/src/coreclr/scripts/superpmi.py @@ -1954,7 +1954,7 @@ def aggregate_diff_metrics(details_file): # Project out these fields for the saved diffs, to use for further # processing. Saving everything into memory is costly on memory when there # are a large number of diffs. - diffs_fields = ["Context", "Context size", "Base ActualCodeBytes", "Diff ActualCodeBytes", "Base PerfScore", "Diff PerfScore"] + diffs_fields = ["Context", "Method full name", "Context size", "Base ActualCodeBytes", "Diff ActualCodeBytes", "Base PerfScore", "Diff PerfScore"] diffs = [] for row in read_csv(details_file): @@ -2725,12 +2725,40 @@ def diff_pct(r): display_subset("Smallest {} zero sized diffs:", smallest_zero_size_contexts) - by_diff_size_pct_examples = [diff for diff in by_diff_size_pct if abs(int(diff['Diff ActualCodeBytes']) - int(diff['Base ActualCodeBytes'])) < 50] - if len(by_diff_size_pct_examples) == 0: - by_diff_size_pct_examples = by_diff_size_pct + # Prefer to show small diffs over large percentage wise diffs; sort by this additionally. + # sorted is stable, so for multiple small diffs this will keep them in order of percentage wise improvement/regression. + def is_small_diff(row): + if abs(int(row['Diff ActualCodeBytes']) - int(row['Base ActualCodeBytes'])) < 50: + return 0 + + return 1 + + by_small_then_improvement = sorted(by_diff_size_pct, key=is_small_diff) + by_small_then_regression = sorted(reversed(by_diff_size_pct), key=is_small_diff) + + def pick_examples(diffs, picked_contexts): + seen = set() + def try_add_seen(row): + len_before = len(seen) + seen.add(row["Method full name"]) + return len_before < len(seen) + + result = [] + for row in diffs: + if row["Context"] in picked_contexts or not try_add_seen(row): + continue + + result.append(row) + if len(result) >= 3: + break + + return result + + example_improvements = pick_examples(by_small_then_improvement, set()) + + example_improvement_contexts = set(row["Context"] for row in example_improvements) + example_regressions = pick_examples(by_small_then_regression, example_improvement_contexts) - example_improvements = by_diff_size_pct_examples[:3] - example_regressions = by_diff_size_pct_examples[3:][-3:] contexts = smallest_contexts + top_improvements + top_regressions + top_improvements_pct + top_regressions_pct + smallest_zero_size_contexts + example_improvements + example_regressions examples = example_improvements + example_regressions