Skip to content

Commit

Permalink
scheduler: optimize QueueSort func when Gang and BarePod Coexists
Browse files Browse the repository at this point in the history
Signed-off-by: wangjianyu.wjy <wangjianyu.wjy@alibaba-inc.com>
  • Loading branch information
wangjianyu.wjy committed Mar 29, 2024
1 parent af5f951 commit e9ab057
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 43 deletions.
17 changes: 17 additions & 0 deletions pkg/scheduler/plugins/coscheduling/core/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ type Manager interface {
GetGangSummaries() map[string]*GangSummary
IsGangMinSatisfied(*corev1.Pod) bool
GetChildScheduleCycle(*corev1.Pod) int
GetGangGroupWaitingBoundPodNum(pod *corev1.Pod) int
}

// PodGroupManager defines the scheduling operation called
Expand Down Expand Up @@ -552,3 +553,19 @@ func (pgMgr *PodGroupManager) GetChildScheduleCycle(pod *corev1.Pod) int {

return gang.getChildScheduleCycle(pod)
}

func (pgMgr *PodGroupManager) GetGangGroupWaitingBoundPodNum(pod *corev1.Pod) int {
gang := pgMgr.GetGangByPod(pod)
if gang == nil {
return 0
}
gangGroup := gang.GangGroup
waitingPodNum := 0
for _, memberGangID := range gangGroup {
memberGang := pgMgr.cache.getGangFromCacheByGangId(memberGangID, false)
if memberGang != nil {
waitingPodNum += memberGang.getGangWaitingPods()
}
}
return waitingPodNum
}
41 changes: 21 additions & 20 deletions pkg/scheduler/plugins/coscheduling/coscheduling.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,28 +135,29 @@ func (cs *Coscheduling) Less(podInfo1, podInfo2 *framework.QueuedPodInfo) bool {

group1, _ := cs.pgMgr.GetGroupId(podInfo1.Pod)
group2, _ := cs.pgMgr.GetGroupId(podInfo2.Pod)
if group1 != group2 {
return group1 < group2
}

isgang1satisfied := cs.pgMgr.IsGangMinSatisfied(podInfo1.Pod)
isgang2satisfied := cs.pgMgr.IsGangMinSatisfied(podInfo2.Pod)
if isgang1satisfied != isgang2satisfied {
return !isgang1satisfied
}

childScheduleCycle1 := cs.pgMgr.GetChildScheduleCycle(podInfo1.Pod)
childScheduleCycle2 := cs.pgMgr.GetChildScheduleCycle(podInfo2.Pod)
if childScheduleCycle1 != childScheduleCycle2 {
return childScheduleCycle1 < childScheduleCycle2
}

creationTime1 := cs.pgMgr.GetCreatTime(podInfo1)
creationTime2 := cs.pgMgr.GetCreatTime(podInfo2)
if creationTime1.Equal(creationTime2) {
return util.GetId(podInfo1.Pod.Namespace, podInfo1.Pod.Name) < util.GetId(podInfo2.Pod.Namespace, podInfo2.Pod.Name)
waitingBoundPodNum1 := cs.pgMgr.GetGangGroupWaitingBoundPodNum(podInfo1.Pod)
waitingBoundPodNum2 := cs.pgMgr.GetGangGroupWaitingBoundPodNum(podInfo2.Pod)
if waitingBoundPodNum1 != 0 || waitingBoundPodNum2 != 0 {
// At the same time, only member pod of one podGroup should be assumed, so we prefer the pod already having sibling assumed, then they can succeed together.
if waitingBoundPodNum1 == 0 || waitingBoundPodNum2 == 0 {
return waitingBoundPodNum1 != 0
}
/*
Two gang groups may both already have some assumed sibling pods.
For example:
1. GroupA has submitted 6 member, and have 5 already assumed;
2. then the sixth has been deleted;
3. then GroupB submitted its pods and have 3 already assumed;
4. GroupA submit the sixth pod
In this case, waitingPodNum will make no sense, so just sort it by group id to give fixed order.
Because no matter former succeed or fail, it's waitingPodNum will be zeroed. And the deadlock will be avoided
*/
return group1 < group2
}
return creationTime1.Before(creationTime2)
// If no pod succeed, we will schedule all pod by RoundRobin to assure fairness.
// If some time-consuming member pod of one gang failed, then it's sibling will fail soon(because scheduling cycle invalid), so no need to assure all sibling should fail together.
return podInfo1.Timestamp.Before(podInfo2.Timestamp)
}

// PreFilter
Expand Down
46 changes: 23 additions & 23 deletions pkg/scheduler/plugins/coscheduling/coscheduling_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -382,12 +382,12 @@ func TestLess(t *testing.T) {
{
name: "equal priority, but p1 is added to schedulingQ earlier than p2",
p1: &framework.QueuedPodInfo{
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangA_ns).Name("pod1").Priority(highPriority).Label(extension.LabelPodPriority, lowSubPriority).Obj()),
InitialAttemptTimestamp: earltTime,
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangA_ns).Name("pod1").Priority(highPriority).Label(extension.LabelPodPriority, lowSubPriority).Obj()),
Timestamp: earltTime,
},
p2: &framework.QueuedPodInfo{
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangB_ns).Name("pod2").Priority(highPriority).Label(extension.LabelPodPriority, lowSubPriority).Obj()),
InitialAttemptTimestamp: lateTime,
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangB_ns).Name("pod2").Priority(highPriority).Label(extension.LabelPodPriority, lowSubPriority).Obj()),
Timestamp: lateTime,
},
expected: true, // p1 should be ahead of p2 in the queue
},
Expand All @@ -404,12 +404,12 @@ func TestLess(t *testing.T) {
{
name: "equal priority, p1 is added to schedulingQ earlier than p2",
p1: &framework.QueuedPodInfo{
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangB_ns).Name("pod1").Priority(highPriority).Obj()),
InitialAttemptTimestamp: earltTime,
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangB_ns).Name("pod1").Priority(highPriority).Obj()),
Timestamp: earltTime,
},
p2: &framework.QueuedPodInfo{
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangA_ns).Name("pod2").Priority(highPriority).Obj()),
InitialAttemptTimestamp: lateTime,
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangA_ns).Name("pod2").Priority(highPriority).Obj()),
Timestamp: lateTime,
},
expected: true, // p1 should be ahead of p2 in the queue
},
Expand Down Expand Up @@ -439,27 +439,27 @@ func TestLess(t *testing.T) {
{
name: "equal priority. p2 is added to schedulingQ earlier than p1, p1 belongs to gangA and p2 belongs to gangB",
p1: &framework.QueuedPodInfo{
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangA_ns).Name("pod1").Priority(highPriority).Obj()),
InitialAttemptTimestamp: lateTime,
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangA_ns).Name("pod1").Priority(highPriority).Obj()),
Timestamp: lateTime,
},
annotations: map[string]string{extension.AnnotationGangName: "gangA"},
p2: &framework.QueuedPodInfo{
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangB_ns).Name("pod2").Priority(highPriority).Label(v1alpha1.PodGroupLabel, "gangB").Obj()),
InitialAttemptTimestamp: earltTime,
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangB_ns).Name("pod2").Priority(highPriority).Label(v1alpha1.PodGroupLabel, "gangB").Obj()),
Timestamp: earltTime,
},
expected: true, // p1 should be ahead of p2 in the queue
expected: false, // p1 should be ahead of p2 in the queue
},
{
name: "equal priority and creation time, both belongs to gangB",
p1: &framework.QueuedPodInfo{
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangB_ns).Name("pod1").Priority(highPriority).Label(v1alpha1.PodGroupLabel, "gangB").Obj()),
InitialAttemptTimestamp: lateTime,
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangB_ns).Name("pod1").Priority(highPriority).Label(v1alpha1.PodGroupLabel, "gangB").Obj()),
Timestamp: lateTime,
},
p2: &framework.QueuedPodInfo{
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangB_ns).Name("pod2").Priority(highPriority).Label(v1alpha1.PodGroupLabel, "gangB").Obj()),
InitialAttemptTimestamp: earltTime,
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangB_ns).Name("pod2").Priority(highPriority).Label(v1alpha1.PodGroupLabel, "gangB").Obj()),
Timestamp: earltTime,
},
expected: true, // p1 should be ahead of p2 in the queue
expected: false,
},
{
name: "equal priority and creation time, both belongs to gangB, childScheduleCycle not equal",
Expand All @@ -478,14 +478,14 @@ func TestLess(t *testing.T) {
{
name: "equal priority and creation time, p1 belongs to gangA that has been satisfied",
p1: &framework.QueuedPodInfo{
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangB_ns).Name("pod1").Priority(highPriority).Label(v1alpha1.PodGroupLabel, "gangD").Obj()),
InitialAttemptTimestamp: lateTime,
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangB_ns).Name("pod1").Priority(highPriority).Label(v1alpha1.PodGroupLabel, "gangD").Obj()),
Timestamp: lateTime,
},
p2: &framework.QueuedPodInfo{
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangC_ns).Name("pod2").Priority(highPriority).Label(v1alpha1.PodGroupLabel, "gangC").Obj()),
InitialAttemptTimestamp: earltTime,
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangC_ns).Name("pod2").Priority(highPriority).Label(v1alpha1.PodGroupLabel, "gangC").Obj()),
Timestamp: earltTime,
},
expected: true, // p1 should be ahead of p2 in the queue
expected: false,
},
} {
t.Run(tt.name, func(t *testing.T) {
Expand Down

0 comments on commit e9ab057

Please sign in to comment.