Skip to content

Commit

Permalink
scripts: Improve diversity of changes
Browse files Browse the repository at this point in the history
  • Loading branch information
dtcxzyw committed Sep 17, 2024
1 parent 754b379 commit 0245c3f
Showing 1 changed file with 31 additions and 10 deletions.
41 changes: 31 additions & 10 deletions scripts/filter_pr_changes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,31 +2,52 @@

import subprocess
import os
import heapq

max_diff_per_file = 500
max_diff_total = 15000
max_file_total = 200
trivial_penalty = 200
diversity_penalty_inc = 30

stats = subprocess.check_output(['git', 'diff', '--numstat']).decode().splitlines()
diffs = []
# TODO: maximize diff diversity
diff_pattern = set()
diffs = dict()
for line in stats:
add, sub, file = line.removesuffix('\n').split()
count = int(add)+int(sub)
if count > max_diff_per_file:
continue
key = (add, sub)
if key in diff_pattern:
continue
diff_pattern.add(key)
diffs.append((file, count))
diffs.sort(key=lambda x: x[1])
if add == sub:
count += trivial_penalty
proj = os.path.basename(os.path.dirname(os.path.dirname(file)))
diff_list = diffs.get(proj, list())
diff_list.append((count, file, proj, int(add), int(sub)))
diffs[proj] = diff_list

diff_heap = []
for list in diffs.values():
list.sort(key=lambda x: x[0])
diff_heap.append(list.pop(0))
heapq.heapify(diff_heap)

diversity_penalty = dict()
diff_pattern = set()
file_count = 0
diff_count = 0
while len(diff_heap) != 0:
cnt, file, proj, add, sub = heapq.heappop(diff_heap)
proj_list = diffs[proj]
if len(proj_list) != 0:
diversity_penalty[proj] = diversity_penalty.get(proj, 0) + diversity_penalty_inc
cnt2, file2, proj2, add2, sub2 = proj_list.pop(0)
cnt2 += diversity_penalty[proj]
heapq.heappush(diff_heap, (cnt2, file2, proj2, add2, sub2))

for file, count in diffs:
key = (add, sub)
if key in diff_pattern:
continue
diff_pattern.add(key)
count = add + sub
if file_count < max_file_total and diff_count + count <= max_diff_total:
file_count += 1
diff_count += count
Expand Down

0 comments on commit 0245c3f

Please sign in to comment.