Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

scheduler: optimize QueueSort func when Gang and Barepod Coexists #1977

Merged
merged 1 commit into from
Mar 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions pkg/scheduler/plugins/coscheduling/core/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ type Manager interface {
GetGangSummaries() map[string]*GangSummary
IsGangMinSatisfied(*corev1.Pod) bool
GetChildScheduleCycle(*corev1.Pod) int
GetGangGroupWaitingBoundPodNum(pod *corev1.Pod) int
}

// PodGroupManager defines the scheduling operation called
Expand Down Expand Up @@ -552,3 +553,19 @@ func (pgMgr *PodGroupManager) GetChildScheduleCycle(pod *corev1.Pod) int {

return gang.getChildScheduleCycle(pod)
}

func (pgMgr *PodGroupManager) GetGangGroupWaitingBoundPodNum(pod *corev1.Pod) int {
gang := pgMgr.GetGangByPod(pod)
if gang == nil {
return 0
}
gangGroup := gang.GangGroup
waitingPodNum := 0
for _, memberGangID := range gangGroup {
memberGang := pgMgr.cache.getGangFromCacheByGangId(memberGangID, false)
if memberGang != nil {
waitingPodNum += memberGang.getGangWaitingPods()
}
}
return waitingPodNum
}
41 changes: 21 additions & 20 deletions pkg/scheduler/plugins/coscheduling/coscheduling.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,28 +135,29 @@ func (cs *Coscheduling) Less(podInfo1, podInfo2 *framework.QueuedPodInfo) bool {

group1, _ := cs.pgMgr.GetGroupId(podInfo1.Pod)
group2, _ := cs.pgMgr.GetGroupId(podInfo2.Pod)
if group1 != group2 {
return group1 < group2
}

isgang1satisfied := cs.pgMgr.IsGangMinSatisfied(podInfo1.Pod)
isgang2satisfied := cs.pgMgr.IsGangMinSatisfied(podInfo2.Pod)
if isgang1satisfied != isgang2satisfied {
return !isgang1satisfied
}

childScheduleCycle1 := cs.pgMgr.GetChildScheduleCycle(podInfo1.Pod)
childScheduleCycle2 := cs.pgMgr.GetChildScheduleCycle(podInfo2.Pod)
if childScheduleCycle1 != childScheduleCycle2 {
return childScheduleCycle1 < childScheduleCycle2
}

creationTime1 := cs.pgMgr.GetCreatTime(podInfo1)
creationTime2 := cs.pgMgr.GetCreatTime(podInfo2)
if creationTime1.Equal(creationTime2) {
return util.GetId(podInfo1.Pod.Namespace, podInfo1.Pod.Name) < util.GetId(podInfo2.Pod.Namespace, podInfo2.Pod.Name)
waitingBoundPodNum1 := cs.pgMgr.GetGangGroupWaitingBoundPodNum(podInfo1.Pod)
waitingBoundPodNum2 := cs.pgMgr.GetGangGroupWaitingBoundPodNum(podInfo2.Pod)
if waitingBoundPodNum1 != 0 || waitingBoundPodNum2 != 0 {
// At the same time, only member pod of one podGroup should be assumed, so we prefer the pod already having sibling assumed, then they can succeed together.
if waitingBoundPodNum1 == 0 || waitingBoundPodNum2 == 0 {
return waitingBoundPodNum1 != 0
}
/*
Two gang groups may both already have some assumed sibling pods.
For example:
1. GroupA has submitted 6 member, and have 5 already assumed;
2. then the sixth has been deleted;
3. then GroupB submitted its pods and have 3 already assumed;
4. GroupA submit the sixth pod
In this case, waitingPodNum will make no sense, so just sort it by group id to give fixed order.
Because no matter former succeed or fail, it's waitingPodNum will be zeroed. And the deadlock will be avoided
*/
return group1 < group2
}
return creationTime1.Before(creationTime2)
// If no pod succeed, we will schedule all pod by RoundRobin to assure fairness.
// If some time-consuming member pod of one gang failed, then it's sibling will fail soon(because scheduling cycle invalid), so no need to assure all sibling should fail together.
return podInfo1.Timestamp.Before(podInfo2.Timestamp)
}

// PreFilter
Expand Down
46 changes: 23 additions & 23 deletions pkg/scheduler/plugins/coscheduling/coscheduling_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -382,12 +382,12 @@ func TestLess(t *testing.T) {
{
name: "equal priority, but p1 is added to schedulingQ earlier than p2",
p1: &framework.QueuedPodInfo{
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangA_ns).Name("pod1").Priority(highPriority).Label(extension.LabelPodPriority, lowSubPriority).Obj()),
InitialAttemptTimestamp: earltTime,
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangA_ns).Name("pod1").Priority(highPriority).Label(extension.LabelPodPriority, lowSubPriority).Obj()),
Timestamp: earltTime,
},
p2: &framework.QueuedPodInfo{
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangB_ns).Name("pod2").Priority(highPriority).Label(extension.LabelPodPriority, lowSubPriority).Obj()),
InitialAttemptTimestamp: lateTime,
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangB_ns).Name("pod2").Priority(highPriority).Label(extension.LabelPodPriority, lowSubPriority).Obj()),
Timestamp: lateTime,
},
expected: true, // p1 should be ahead of p2 in the queue
},
Expand All @@ -404,12 +404,12 @@ func TestLess(t *testing.T) {
{
name: "equal priority, p1 is added to schedulingQ earlier than p2",
p1: &framework.QueuedPodInfo{
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangB_ns).Name("pod1").Priority(highPriority).Obj()),
InitialAttemptTimestamp: earltTime,
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangB_ns).Name("pod1").Priority(highPriority).Obj()),
Timestamp: earltTime,
},
p2: &framework.QueuedPodInfo{
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangA_ns).Name("pod2").Priority(highPriority).Obj()),
InitialAttemptTimestamp: lateTime,
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangA_ns).Name("pod2").Priority(highPriority).Obj()),
Timestamp: lateTime,
},
expected: true, // p1 should be ahead of p2 in the queue
},
Expand Down Expand Up @@ -439,27 +439,27 @@ func TestLess(t *testing.T) {
{
name: "equal priority. p2 is added to schedulingQ earlier than p1, p1 belongs to gangA and p2 belongs to gangB",
p1: &framework.QueuedPodInfo{
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangA_ns).Name("pod1").Priority(highPriority).Obj()),
InitialAttemptTimestamp: lateTime,
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangA_ns).Name("pod1").Priority(highPriority).Obj()),
Timestamp: lateTime,
},
annotations: map[string]string{extension.AnnotationGangName: "gangA"},
p2: &framework.QueuedPodInfo{
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangB_ns).Name("pod2").Priority(highPriority).Label(v1alpha1.PodGroupLabel, "gangB").Obj()),
InitialAttemptTimestamp: earltTime,
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangB_ns).Name("pod2").Priority(highPriority).Label(v1alpha1.PodGroupLabel, "gangB").Obj()),
Timestamp: earltTime,
},
expected: true, // p1 should be ahead of p2 in the queue
expected: false, // p1 should be ahead of p2 in the queue
},
{
name: "equal priority and creation time, both belongs to gangB",
p1: &framework.QueuedPodInfo{
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangB_ns).Name("pod1").Priority(highPriority).Label(v1alpha1.PodGroupLabel, "gangB").Obj()),
InitialAttemptTimestamp: lateTime,
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangB_ns).Name("pod1").Priority(highPriority).Label(v1alpha1.PodGroupLabel, "gangB").Obj()),
Timestamp: lateTime,
},
p2: &framework.QueuedPodInfo{
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangB_ns).Name("pod2").Priority(highPriority).Label(v1alpha1.PodGroupLabel, "gangB").Obj()),
InitialAttemptTimestamp: earltTime,
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangB_ns).Name("pod2").Priority(highPriority).Label(v1alpha1.PodGroupLabel, "gangB").Obj()),
Timestamp: earltTime,
},
expected: true, // p1 should be ahead of p2 in the queue
expected: false,
},
{
name: "equal priority and creation time, both belongs to gangB, childScheduleCycle not equal",
Expand All @@ -478,14 +478,14 @@ func TestLess(t *testing.T) {
{
name: "equal priority and creation time, p1 belongs to gangA that has been satisfied",
p1: &framework.QueuedPodInfo{
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangB_ns).Name("pod1").Priority(highPriority).Label(v1alpha1.PodGroupLabel, "gangD").Obj()),
InitialAttemptTimestamp: lateTime,
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangB_ns).Name("pod1").Priority(highPriority).Label(v1alpha1.PodGroupLabel, "gangD").Obj()),
Timestamp: lateTime,
},
p2: &framework.QueuedPodInfo{
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangC_ns).Name("pod2").Priority(highPriority).Label(v1alpha1.PodGroupLabel, "gangC").Obj()),
InitialAttemptTimestamp: earltTime,
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangC_ns).Name("pod2").Priority(highPriority).Label(v1alpha1.PodGroupLabel, "gangC").Obj()),
Timestamp: earltTime,
},
expected: true, // p1 should be ahead of p2 in the queue
expected: false,
},
} {
t.Run(tt.name, func(t *testing.T) {
Expand Down
Loading