Skip to content

Commit

Permalink
scheduler: optimize QueueSort func when Gang and Barepod Coexists
Browse files Browse the repository at this point in the history
Signed-off-by: wangjianyu.wjy <wangjianyu.wjy@alibaba-inc.com>
  • Loading branch information
wangjianyu.wjy committed Mar 28, 2024
1 parent af5f951 commit a57ea6a
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 43 deletions.
17 changes: 17 additions & 0 deletions pkg/scheduler/plugins/coscheduling/core/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ type Manager interface {
GetGangSummaries() map[string]*GangSummary
IsGangMinSatisfied(*corev1.Pod) bool
GetChildScheduleCycle(*corev1.Pod) int
GetGangGroupWaitingPodNum(pod *corev1.Pod) int
}

// PodGroupManager defines the scheduling operation called
Expand Down Expand Up @@ -552,3 +553,19 @@ func (pgMgr *PodGroupManager) GetChildScheduleCycle(pod *corev1.Pod) int {

return gang.getChildScheduleCycle(pod)
}

func (pgMgr *PodGroupManager) GetGangGroupWaitingPodNum(pod *corev1.Pod) int {
gang := pgMgr.GetGangByPod(pod)
if gang == nil {
return 0
}
gangGroup := gang.GangGroup
waitingPodNum := 0
for _, memberGangID := range gangGroup {
memberGang := pgMgr.cache.getGangFromCacheByGangId(memberGangID, false)
if memberGang != nil {
waitingPodNum += memberGang.getGangWaitingPods()
}
}
return waitingPodNum
}
35 changes: 15 additions & 20 deletions pkg/scheduler/plugins/coscheduling/coscheduling.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,28 +135,23 @@ func (cs *Coscheduling) Less(podInfo1, podInfo2 *framework.QueuedPodInfo) bool {

group1, _ := cs.pgMgr.GetGroupId(podInfo1.Pod)
group2, _ := cs.pgMgr.GetGroupId(podInfo2.Pod)
if group1 != group2 {
return group1 < group2
}

isgang1satisfied := cs.pgMgr.IsGangMinSatisfied(podInfo1.Pod)
isgang2satisfied := cs.pgMgr.IsGangMinSatisfied(podInfo2.Pod)
if isgang1satisfied != isgang2satisfied {
return !isgang1satisfied
}

childScheduleCycle1 := cs.pgMgr.GetChildScheduleCycle(podInfo1.Pod)
childScheduleCycle2 := cs.pgMgr.GetChildScheduleCycle(podInfo2.Pod)
if childScheduleCycle1 != childScheduleCycle2 {
return childScheduleCycle1 < childScheduleCycle2
}

creationTime1 := cs.pgMgr.GetCreatTime(podInfo1)
creationTime2 := cs.pgMgr.GetCreatTime(podInfo2)
if creationTime1.Equal(creationTime2) {
return util.GetId(podInfo1.Pod.Namespace, podInfo1.Pod.Name) < util.GetId(podInfo2.Pod.Namespace, podInfo2.Pod.Name)
waitingPodNum1 := cs.pgMgr.GetGangGroupWaitingPodNum(podInfo1.Pod)
waitingPodNum2 := cs.pgMgr.GetGangGroupWaitingPodNum(podInfo2.Pod)
if waitingPodNum1 != 0 || waitingPodNum2 != 0 {
// At the same time, only member pod of one podGroup should be assumed, so we prefer the pod having sibling assumed, then they can succeed together.
if waitingPodNum1 == 0 || waitingPodNum2 == 0 {
return waitingPodNum1 != 0
}
/*
In some cases, high priority pod may interrupt currently assumed gang pod, so live lock may occur.
And waitingPodNum makes no sense now, just sort it by group id. Because no matter former succeed or fail, it's waitingPodNum will be zeroed.
*/
return group1 < group2
}
return creationTime1.Before(creationTime2)
// If no pod succeed, we will schedule all pod by RoundRobin to assure fairness.
// If some time-consuming member pod of one gang failed, then it's sibling will fail soon(because scheduling cycle invalid), so no need to assure all sibling should fail together.
return podInfo1.Timestamp.Before(podInfo2.Timestamp)
}

// PreFilter
Expand Down
46 changes: 23 additions & 23 deletions pkg/scheduler/plugins/coscheduling/coscheduling_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -382,12 +382,12 @@ func TestLess(t *testing.T) {
{
name: "equal priority, but p1 is added to schedulingQ earlier than p2",
p1: &framework.QueuedPodInfo{
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangA_ns).Name("pod1").Priority(highPriority).Label(extension.LabelPodPriority, lowSubPriority).Obj()),
InitialAttemptTimestamp: earltTime,
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangA_ns).Name("pod1").Priority(highPriority).Label(extension.LabelPodPriority, lowSubPriority).Obj()),
Timestamp: earltTime,
},
p2: &framework.QueuedPodInfo{
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangB_ns).Name("pod2").Priority(highPriority).Label(extension.LabelPodPriority, lowSubPriority).Obj()),
InitialAttemptTimestamp: lateTime,
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangB_ns).Name("pod2").Priority(highPriority).Label(extension.LabelPodPriority, lowSubPriority).Obj()),
Timestamp: lateTime,
},
expected: true, // p1 should be ahead of p2 in the queue
},
Expand All @@ -404,12 +404,12 @@ func TestLess(t *testing.T) {
{
name: "equal priority, p1 is added to schedulingQ earlier than p2",
p1: &framework.QueuedPodInfo{
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangB_ns).Name("pod1").Priority(highPriority).Obj()),
InitialAttemptTimestamp: earltTime,
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangB_ns).Name("pod1").Priority(highPriority).Obj()),
Timestamp: earltTime,
},
p2: &framework.QueuedPodInfo{
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangA_ns).Name("pod2").Priority(highPriority).Obj()),
InitialAttemptTimestamp: lateTime,
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangA_ns).Name("pod2").Priority(highPriority).Obj()),
Timestamp: lateTime,
},
expected: true, // p1 should be ahead of p2 in the queue
},
Expand Down Expand Up @@ -439,27 +439,27 @@ func TestLess(t *testing.T) {
{
name: "equal priority. p2 is added to schedulingQ earlier than p1, p1 belongs to gangA and p2 belongs to gangB",
p1: &framework.QueuedPodInfo{
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangA_ns).Name("pod1").Priority(highPriority).Obj()),
InitialAttemptTimestamp: lateTime,
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangA_ns).Name("pod1").Priority(highPriority).Obj()),
Timestamp: lateTime,
},
annotations: map[string]string{extension.AnnotationGangName: "gangA"},
p2: &framework.QueuedPodInfo{
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangB_ns).Name("pod2").Priority(highPriority).Label(v1alpha1.PodGroupLabel, "gangB").Obj()),
InitialAttemptTimestamp: earltTime,
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangB_ns).Name("pod2").Priority(highPriority).Label(v1alpha1.PodGroupLabel, "gangB").Obj()),
Timestamp: earltTime,
},
expected: true, // p1 should be ahead of p2 in the queue
expected: false, // p1 should be ahead of p2 in the queue
},
{
name: "equal priority and creation time, both belongs to gangB",
p1: &framework.QueuedPodInfo{
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangB_ns).Name("pod1").Priority(highPriority).Label(v1alpha1.PodGroupLabel, "gangB").Obj()),
InitialAttemptTimestamp: lateTime,
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangB_ns).Name("pod1").Priority(highPriority).Label(v1alpha1.PodGroupLabel, "gangB").Obj()),
Timestamp: lateTime,
},
p2: &framework.QueuedPodInfo{
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangB_ns).Name("pod2").Priority(highPriority).Label(v1alpha1.PodGroupLabel, "gangB").Obj()),
InitialAttemptTimestamp: earltTime,
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangB_ns).Name("pod2").Priority(highPriority).Label(v1alpha1.PodGroupLabel, "gangB").Obj()),
Timestamp: earltTime,
},
expected: true, // p1 should be ahead of p2 in the queue
expected: false,
},
{
name: "equal priority and creation time, both belongs to gangB, childScheduleCycle not equal",
Expand All @@ -478,14 +478,14 @@ func TestLess(t *testing.T) {
{
name: "equal priority and creation time, p1 belongs to gangA that has been satisfied",
p1: &framework.QueuedPodInfo{
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangB_ns).Name("pod1").Priority(highPriority).Label(v1alpha1.PodGroupLabel, "gangD").Obj()),
InitialAttemptTimestamp: lateTime,
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangB_ns).Name("pod1").Priority(highPriority).Label(v1alpha1.PodGroupLabel, "gangD").Obj()),
Timestamp: lateTime,
},
p2: &framework.QueuedPodInfo{
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangC_ns).Name("pod2").Priority(highPriority).Label(v1alpha1.PodGroupLabel, "gangC").Obj()),
InitialAttemptTimestamp: earltTime,
PodInfo: framework.NewPodInfo(st.MakePod().Namespace(gangC_ns).Name("pod2").Priority(highPriority).Label(v1alpha1.PodGroupLabel, "gangC").Obj()),
Timestamp: earltTime,
},
expected: true, // p1 should be ahead of p2 in the queue
expected: false,
},
} {
t.Run(tt.name, func(t *testing.T) {
Expand Down

0 comments on commit a57ea6a

Please sign in to comment.