Skip to content

Commit

Permalink
SPMI: Avoid duplicate example diffs in diffs summary (dotnet#110619)
Browse files Browse the repository at this point in the history
This deduplicates the examples that can be picked by their method name;
this means that the examples no longer should show multiple example
diffs with the same name.
  • Loading branch information
jakobbotsch authored Dec 12, 2024
1 parent 2df2fea commit 3eeea54
Showing 1 changed file with 34 additions and 6 deletions.
40 changes: 34 additions & 6 deletions src/coreclr/scripts/superpmi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1954,7 +1954,7 @@ def aggregate_diff_metrics(details_file):
# Project out these fields for the saved diffs, to use for further
# processing. Saving everything into memory is costly on memory when there
# are a large number of diffs.
diffs_fields = ["Context", "Context size", "Base ActualCodeBytes", "Diff ActualCodeBytes", "Base PerfScore", "Diff PerfScore"]
diffs_fields = ["Context", "Method full name", "Context size", "Base ActualCodeBytes", "Diff ActualCodeBytes", "Base PerfScore", "Diff PerfScore"]
diffs = []

for row in read_csv(details_file):
Expand Down Expand Up @@ -2725,12 +2725,40 @@ def diff_pct(r):

display_subset("Smallest {} zero sized diffs:", smallest_zero_size_contexts)

by_diff_size_pct_examples = [diff for diff in by_diff_size_pct if abs(int(diff['Diff ActualCodeBytes']) - int(diff['Base ActualCodeBytes'])) < 50]
if len(by_diff_size_pct_examples) == 0:
by_diff_size_pct_examples = by_diff_size_pct
# Prefer to show small diffs over large percentage wise diffs; sort by this additionally.
# sorted is stable, so for multiple small diffs this will keep them in order of percentage wise improvement/regression.
def is_small_diff(row):
if abs(int(row['Diff ActualCodeBytes']) - int(row['Base ActualCodeBytes'])) < 50:
return 0

return 1

by_small_then_improvement = sorted(by_diff_size_pct, key=is_small_diff)
by_small_then_regression = sorted(reversed(by_diff_size_pct), key=is_small_diff)

def pick_examples(diffs, picked_contexts):
seen = set()
def try_add_seen(row):
len_before = len(seen)
seen.add(row["Method full name"])
return len_before < len(seen)

result = []
for row in diffs:
if row["Context"] in picked_contexts or not try_add_seen(row):
continue

result.append(row)
if len(result) >= 3:
break

return result

example_improvements = pick_examples(by_small_then_improvement, set())

example_improvement_contexts = set(row["Context"] for row in example_improvements)
example_regressions = pick_examples(by_small_then_regression, example_improvement_contexts)

example_improvements = by_diff_size_pct_examples[:3]
example_regressions = by_diff_size_pct_examples[3:][-3:]
contexts = smallest_contexts + top_improvements + top_regressions + top_improvements_pct + top_regressions_pct + smallest_zero_size_contexts + example_improvements + example_regressions
examples = example_improvements + example_regressions

Expand Down

0 comments on commit 3eeea54

Please sign in to comment.