Skip to content

Commit

Permalink
scheduler: recover gang check in preFilter (#2217)
Browse files Browse the repository at this point in the history
Signed-off-by: wangjianyu.wjy <wangjianyu.wjy@alibaba-inc.com>
Co-authored-by: wangjianyu.wjy <wangjianyu.wjy@alibaba-inc.com>
  • Loading branch information
ZiMengSheng and wangjianyu.wjy authored Sep 30, 2024
1 parent 83267af commit 0466bf3
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 19 deletions.
26 changes: 23 additions & 3 deletions pkg/scheduler/plugins/coscheduling/core/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,22 +259,42 @@ func (pgMgr *PodGroupManager) PreEnqueue(ctx context.Context, pod *corev1.Pod) (
}

// PreFilter
// i.Check whether the Gang has met the scheduleCycleValid check, and reject the pod if negative(only Strict mode ).
// ii.Check whether the Gang is OnceResourceSatisfied
// iii.Check whether the Gang has met the scheduleCycleValid check, and reject the pod if negative(only Strict mode ).
// i.Check whether children in Gang has met the requirements of minimum number under each Gang, and reject the pod if negative.
// ii.Check whether the Gang is inited, and reject the pod if positive.
// iii.Check whether the Gang is OnceResourceSatisfied
// iv.Check whether the Gang has met the scheduleCycleValid check, and reject the pod if negative(only Strict mode ).
// v.Try update scheduleCycle, scheduleCycleValid, childrenScheduleRoundMap as mentioned above.
func (pgMgr *PodGroupManager) PreFilter(ctx context.Context, state *framework.CycleState, pod *corev1.Pod) (err error) {
if !util.IsPodNeedGang(pod) {
return nil
}
preFilterState := &stateData{skipReject: false, skipSetCycleInvalid: false}
state.Write(stateKey, preFilterState)
gang := pgMgr.GetGangByPod(pod)
if gang == nil {
preFilterState.skipSetCycleInvalid = true
return fmt.Errorf("can't find gang, gangName: %v, podName: %v", util.GetId(pod.Namespace, util.GetGangNameByPod(pod)),
util.GetId(pod.Namespace, pod.Name))
}

// check if gang is initialized
if !gang.HasGangInit {
preFilterState.skipSetCycleInvalid = true
return fmt.Errorf("gang has not init, gangName: %v, podName: %v", gang.Name,
util.GetId(pod.Namespace, pod.Name))
}
// resourceSatisfied means pod will directly pass the PreFilter
if gang.getGangMatchPolicy() == extension.GangMatchPolicyOnceSatisfied && gang.isGangOnceResourceSatisfied() {
return nil
}

// check minNum
if gang.getChildrenNum() < gang.getGangMinNum() {
preFilterState.skipSetCycleInvalid = true
return fmt.Errorf("gang child pod not collect enough, gangName: %v, podName: %v", gang.Name,
util.GetId(pod.Namespace, pod.Name))
}

if pgMgr.args != nil && pgMgr.args.SkipCheckScheduleCycle {
return nil
}
Expand Down
89 changes: 73 additions & 16 deletions pkg/scheduler/plugins/coscheduling/core/core_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,19 @@ func TestPlugin_PreFilter(t *testing.T) {
pods: []*corev1.Pod{},
expectedErrorMessage: "",
},
{
name: "pod belongs to a non-existing pg",
pod: st.MakePod().Name("pod2").UID("pod2").Namespace("gangA_ns").Label(v1alpha1.PodGroupLabel, "wenshiqi222").Obj(),
expectedErrorMessage: "gang has not init, gangName: gangA_ns/wenshiqi222, podName: gangA_ns/pod2",
expectedChildCycleMap: map[string]int{
"gangA_ns/pod2": 1,
},
expectedScheduleCycleValid: true,
expectedScheduleCycle: 1,
expectStateData: &stateData{
skipSetCycleInvalid: true,
},
},
{
name: "gang ResourceSatisfied",
pod: st.MakePod().Name("podq").UID("podq").Namespace("gangq_ns").Label(v1alpha1.PodGroupLabel, "gangq").Obj(),
Expand All @@ -409,6 +422,21 @@ func TestPlugin_PreFilter(t *testing.T) {
resourceSatisfied: true,
expectStateData: &stateData{},
},
{
name: "pod count less than minMember",
pod: st.MakePod().Name("pod3").UID("pod3").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "ganga").Obj(),
pods: []*corev1.Pod{
st.MakePod().Name("pod3-1").UID("pod3-1").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "ganga").Obj(),
},
pgs: makePg("ganga", "ganga_ns", 4, &gangACreatedTime, nil),
expectedErrorMessage: "gang child pod not collect enough, gangName: ganga_ns/ganga, podName: ganga_ns/pod3",
expectedScheduleCycle: 1,
expectedChildCycleMap: map[string]int{},
expectedScheduleCycleValid: true,
expectStateData: &stateData{
skipSetCycleInvalid: true,
},
},
{
name: "pods count equal with minMember,but is NonStrictMode",
pod: st.MakePod().Name("pod5").UID("pod5").Namespace("gangb_ns").Label(v1alpha1.PodGroupLabel, "gangb").Obj(),
Expand All @@ -423,10 +451,14 @@ func TestPlugin_PreFilter(t *testing.T) {
expectStateData: &stateData{},
},
{
name: "due to reschedule pod6's podScheduleCycle is equal with the gangScheduleCycle",
pod: st.MakePod().Name("pod6").UID("pod6").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangc").Obj(),
pods: []*corev1.Pod{},
pgs: makePg("gangc", "ganga_ns", 1, &gangACreatedTime, nil),
name: "due to reschedule pod6's podScheduleCycle is equal with the gangScheduleCycle",
pod: st.MakePod().Name("pod6").UID("pod6").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangc").Obj(),
pods: []*corev1.Pod{
st.MakePod().Name("pod6-1").UID("pod6-1").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangc").Obj(),
st.MakePod().Name("pod6-2").UID("pod6-2").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangc").Obj(),
st.MakePod().Name("pod6-3").UID("pod6-3").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangc").Obj(),
},
pgs: makePg("gangc", "ganga_ns", 4, &gangACreatedTime, nil),
shouldSetCycleEqualWithGlobal: true,
totalNum: 5,
expectedScheduleCycle: 1,
Expand All @@ -442,9 +474,9 @@ func TestPlugin_PreFilter(t *testing.T) {
pod: st.MakePod().Name("pod6").UID("pod6").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangc").
NominatedNodeName("N1").Obj(),
pods: []*corev1.Pod{
st.MakePod().Name("pod6-1").UID("pod6-1").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangc").NominatedNodeName("N1").Obj(),
st.MakePod().Name("pod6-2").UID("pod6-2").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangc").NominatedNodeName("N1").Obj(),
st.MakePod().Name("pod6-3").UID("pod6-3").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangc").NominatedNodeName("N1").Obj(),
st.MakePod().Name("pod6-1").UID("pod6-1").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangc").Obj(),
st.MakePod().Name("pod6-2").UID("pod6-2").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangc").Obj(),
st.MakePod().Name("pod6-3").UID("pod6-3").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangc").Obj(),
},
pgs: makePg("gangc", "ganga_ns", 4, &gangACreatedTime, nil),
shouldSetCycleEqualWithGlobal: true,
Expand All @@ -461,10 +493,14 @@ func TestPlugin_PreFilter(t *testing.T) {
expectStateData: &stateData{},
},
{
name: "pods count equal with minMember,is StrictMode,but the gang's scheduleCycle is not valid due to pre pod Filter Failed",
pod: st.MakePod().Name("pod7").UID("pod7").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangd").Obj(),
pods: []*corev1.Pod{},
pgs: makePg("gangd", "ganga_ns", 1, &gangACreatedTime, nil),
name: "pods count equal with minMember,is StrictMode,but the gang's scheduleCycle is not valid due to pre pod Filter Failed",
pod: st.MakePod().Name("pod7").UID("pod7").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangd").Obj(),
pods: []*corev1.Pod{
st.MakePod().Name("pod7-1").UID("pod7-1").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangd").Obj(),
st.MakePod().Name("pod7-2").UID("pod7-2").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangd").Obj(),
st.MakePod().Name("pod7-3").UID("pod7-3").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangd").Obj(),
},
pgs: makePg("gangd", "ganga_ns", 4, &gangACreatedTime, nil),
expectedScheduleCycle: 1,
expectedChildCycleMap: map[string]int{
"ganga_ns/pod7": 1,
Expand All @@ -477,10 +513,14 @@ func TestPlugin_PreFilter(t *testing.T) {
},
},
{
name: "pods count equal with minMember,is StrictMode, disable check scheduleCycle even if the gang's scheduleCycle is not valid",
pod: st.MakePod().Name("pod7").UID("pod7").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangd").Obj(),
pods: []*corev1.Pod{},
pgs: makePg("gangd", "ganga_ns", 1, &gangACreatedTime, nil),
name: "pods count equal with minMember,is StrictMode, disable check scheduleCycle even if the gang's scheduleCycle is not valid",
pod: st.MakePod().Name("pod7").UID("pod7").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangd").Obj(),
pods: []*corev1.Pod{
st.MakePod().Name("pod7-1").UID("pod7-1").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangd").Obj(),
st.MakePod().Name("pod7-2").UID("pod7-2").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangd").Obj(),
st.MakePod().Name("pod7-3").UID("pod7-3").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangd").Obj(),
},
pgs: makePg("gangd", "ganga_ns", 4, &gangACreatedTime, nil),
expectedScheduleCycle: 1,
expectedChildCycleMap: map[string]int{
"ganga_ns/pod7": 1,
Expand All @@ -491,6 +531,24 @@ func TestPlugin_PreFilter(t *testing.T) {
shouldSkipCheckScheduleCycle: true,
expectStateData: &stateData{},
},
{
name: "pods count equal with minMember,is StrictMode,scheduleCycle valid,but childrenNum is not reach to total num",
pod: st.MakePod().Name("pod8").UID("pod8").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gange").Obj(),
pods: []*corev1.Pod{
st.MakePod().Name("pod8-1").UID("pod8-1").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gange").Obj(),
st.MakePod().Name("pod8-2").UID("pod8-2").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gange").Obj(),
st.MakePod().Name("pod8-3").UID("pod8-3").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gange").Obj(),
},
pgs: makePg("gange", "ganga_ns", 4, &gangACreatedTime, nil),
totalNum: 5,
expectedScheduleCycle: 1,
expectedChildCycleMap: map[string]int{
"ganga_ns/pod8": 1,
},
expectedScheduleCycleValid: true,
expectedErrorMessage: "",
expectStateData: &stateData{},
},
{
name: "pods count more than minMember,is StrictMode,scheduleCycle valid,and childrenNum reach to total num",
pod: st.MakePod().Name("pod9").UID("pod9").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "ganga").Obj(),
Expand All @@ -500,7 +558,6 @@ func TestPlugin_PreFilter(t *testing.T) {
st.MakePod().Name("pod9-3").UID("pod9-3").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "ganga").Obj(),
st.MakePod().Name("pod9-4").UID("pod9-4").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "ganga").Obj(),
},
pgs: makePg("ganga", "ganga_ns", 1, &gangACreatedTime, nil),
totalNum: 5,
expectedScheduleCycle: 1,
expectedChildCycleMap: map[string]int{
Expand Down

0 comments on commit 0466bf3

Please sign in to comment.