From cc6262e6745933aeb72c4e762104e3d0bfe7e26f Mon Sep 17 00:00:00 2001 From: lucming <2876757716@qq.com> Date: Fri, 31 May 2024 11:42:38 +0800 Subject: [PATCH] koord-scheduler: add a parameter to mark whether scheduling is allowed on node with expired nodemetric Signed-off-by: lucming <2876757716@qq.com> --- pkg/scheduler/apis/config/types.go | 2 ++ pkg/scheduler/apis/config/v1beta3/types.go | 2 ++ pkg/scheduler/apis/config/v1beta3/zz_generated.conversion.go | 2 ++ pkg/scheduler/apis/config/v1beta3/zz_generated.deepcopy.go | 5 +++++ pkg/scheduler/apis/config/zz_generated.deepcopy.go | 5 +++++ pkg/scheduler/plugins/loadaware/load_aware.go | 4 ++++ 6 files changed, 20 insertions(+) diff --git a/pkg/scheduler/apis/config/types.go b/pkg/scheduler/apis/config/types.go index 2545120a2..51d47eab3 100644 --- a/pkg/scheduler/apis/config/types.go +++ b/pkg/scheduler/apis/config/types.go @@ -37,6 +37,8 @@ type LoadAwareSchedulingArgs struct { // When NodeMetrics expired, the node is considered abnormal. // Default is 180 seconds. NodeMetricExpirationSeconds *int64 + // EnableScheduleWhenNodeMetricsExpired Indicates whether nodes with expired nodeMetrics are allowed to schedule pods. + EnableScheduleWhenNodeMetricsExpired *bool // ResourceWeights indicates the weights of resources. // The weights of CPU and Memory are both 1 by default. ResourceWeights map[corev1.ResourceName]int64 diff --git a/pkg/scheduler/apis/config/v1beta3/types.go b/pkg/scheduler/apis/config/v1beta3/types.go index b194264fb..513206196 100644 --- a/pkg/scheduler/apis/config/v1beta3/types.go +++ b/pkg/scheduler/apis/config/v1beta3/types.go @@ -36,6 +36,8 @@ type LoadAwareSchedulingArgs struct { // When NodeMetrics expired, the node is considered abnormal. // Default is 180 seconds. NodeMetricExpirationSeconds *int64 `json:"nodeMetricExpirationSeconds,omitempty"` + // EnableScheduleWhenNodeMetricsExpired Indicates whether nodes with expired nodeMetrics are allowed to schedule pods. + EnableScheduleWhenNodeMetricsExpired *bool `json:"enableScheduleWhenNodeMetricsExpired,omitempty"` // ResourceWeights indicates the weights of resources. // The weights of CPU and Memory are both 1 by default. ResourceWeights map[corev1.ResourceName]int64 `json:"resourceWeights,omitempty"` diff --git a/pkg/scheduler/apis/config/v1beta3/zz_generated.conversion.go b/pkg/scheduler/apis/config/v1beta3/zz_generated.conversion.go index 1340a4e2b..3d64d59a3 100644 --- a/pkg/scheduler/apis/config/v1beta3/zz_generated.conversion.go +++ b/pkg/scheduler/apis/config/v1beta3/zz_generated.conversion.go @@ -277,6 +277,7 @@ func Convert_config_LoadAwareSchedulingAggregatedArgs_To_v1beta3_LoadAwareSchedu func autoConvert_v1beta3_LoadAwareSchedulingArgs_To_config_LoadAwareSchedulingArgs(in *LoadAwareSchedulingArgs, out *config.LoadAwareSchedulingArgs, s conversion.Scope) error { out.FilterExpiredNodeMetrics = (*bool)(unsafe.Pointer(in.FilterExpiredNodeMetrics)) out.NodeMetricExpirationSeconds = (*int64)(unsafe.Pointer(in.NodeMetricExpirationSeconds)) + out.EnableScheduleWhenNodeMetricsExpired = (*bool)(unsafe.Pointer(in.EnableScheduleWhenNodeMetricsExpired)) out.ResourceWeights = *(*map[corev1.ResourceName]int64)(unsafe.Pointer(&in.ResourceWeights)) out.UsageThresholds = *(*map[corev1.ResourceName]int64)(unsafe.Pointer(&in.UsageThresholds)) out.ProdUsageThresholds = *(*map[corev1.ResourceName]int64)(unsafe.Pointer(&in.ProdUsageThresholds)) @@ -300,6 +301,7 @@ func autoConvert_v1beta3_LoadAwareSchedulingArgs_To_config_LoadAwareSchedulingAr func autoConvert_config_LoadAwareSchedulingArgs_To_v1beta3_LoadAwareSchedulingArgs(in *config.LoadAwareSchedulingArgs, out *LoadAwareSchedulingArgs, s conversion.Scope) error { out.FilterExpiredNodeMetrics = (*bool)(unsafe.Pointer(in.FilterExpiredNodeMetrics)) out.NodeMetricExpirationSeconds = (*int64)(unsafe.Pointer(in.NodeMetricExpirationSeconds)) + out.EnableScheduleWhenNodeMetricsExpired = (*bool)(unsafe.Pointer(in.EnableScheduleWhenNodeMetricsExpired)) out.ResourceWeights = *(*map[corev1.ResourceName]int64)(unsafe.Pointer(&in.ResourceWeights)) out.UsageThresholds = *(*map[corev1.ResourceName]int64)(unsafe.Pointer(&in.UsageThresholds)) out.ProdUsageThresholds = *(*map[corev1.ResourceName]int64)(unsafe.Pointer(&in.ProdUsageThresholds)) diff --git a/pkg/scheduler/apis/config/v1beta3/zz_generated.deepcopy.go b/pkg/scheduler/apis/config/v1beta3/zz_generated.deepcopy.go index 78abb035c..b19989107 100644 --- a/pkg/scheduler/apis/config/v1beta3/zz_generated.deepcopy.go +++ b/pkg/scheduler/apis/config/v1beta3/zz_generated.deepcopy.go @@ -209,6 +209,11 @@ func (in *LoadAwareSchedulingArgs) DeepCopyInto(out *LoadAwareSchedulingArgs) { *out = new(int64) **out = **in } + if in.EnableScheduleWhenNodeMetricsExpired != nil { + in, out := &in.EnableScheduleWhenNodeMetricsExpired, &out.EnableScheduleWhenNodeMetricsExpired + *out = new(bool) + **out = **in + } if in.ResourceWeights != nil { in, out := &in.ResourceWeights, &out.ResourceWeights *out = make(map[corev1.ResourceName]int64, len(*in)) diff --git a/pkg/scheduler/apis/config/zz_generated.deepcopy.go b/pkg/scheduler/apis/config/zz_generated.deepcopy.go index d83136de2..97dac7428 100644 --- a/pkg/scheduler/apis/config/zz_generated.deepcopy.go +++ b/pkg/scheduler/apis/config/zz_generated.deepcopy.go @@ -163,6 +163,11 @@ func (in *LoadAwareSchedulingArgs) DeepCopyInto(out *LoadAwareSchedulingArgs) { *out = new(int64) **out = **in } + if in.EnableScheduleWhenNodeMetricsExpired != nil { + in, out := &in.EnableScheduleWhenNodeMetricsExpired, &out.EnableScheduleWhenNodeMetricsExpired + *out = new(bool) + **out = **in + } if in.ResourceWeights != nil { in, out := &in.ResourceWeights, &out.ResourceWeights *out = make(map[v1.ResourceName]int64, len(*in)) diff --git a/pkg/scheduler/plugins/loadaware/load_aware.go b/pkg/scheduler/plugins/loadaware/load_aware.go index 5baa15102..98e784846 100644 --- a/pkg/scheduler/plugins/loadaware/load_aware.go +++ b/pkg/scheduler/plugins/loadaware/load_aware.go @@ -42,6 +42,7 @@ import ( const ( Name = "LoadAwareScheduling" + ErrReasonNodeMetricExpired = "node(s) nodeMetric expired" ErrReasonUsageExceedThreshold = "node(s) %s usage exceed threshold" ErrReasonAggregatedUsageExceedThreshold = "node(s) %s aggregated usage exceed threshold" ErrReasonFailedEstimatePod @@ -143,6 +144,9 @@ func (p *Plugin) Filter(ctx context.Context, state *framework.CycleState, pod *c if p.args.FilterExpiredNodeMetrics != nil && *p.args.FilterExpiredNodeMetrics && p.args.NodeMetricExpirationSeconds != nil && isNodeMetricExpired(nodeMetric, *p.args.NodeMetricExpirationSeconds) { + if p.args.EnableScheduleWhenNodeMetricsExpired != nil && !*p.args.EnableScheduleWhenNodeMetricsExpired { + return framework.NewStatus(framework.Unschedulable, ErrReasonNodeMetricExpired) + } return nil }