From ba58826a5374f04571ce2ce6e7bb31dd5412dd92 Mon Sep 17 00:00:00 2001 From: zengwang1 Date: Mon, 8 Apr 2024 20:23:10 +0800 Subject: [PATCH] scheduler: add new pod estimate in loadaware plugin Signed-off-by: zwForrest <756495135@qq.com> --- pkg/scheduler/plugins/loadaware/load_aware.go | 170 ++++---- .../plugins/loadaware/load_aware_test.go | 402 ++++++++++++++++++ 2 files changed, 473 insertions(+), 99 deletions(-) diff --git a/pkg/scheduler/plugins/loadaware/load_aware.go b/pkg/scheduler/plugins/loadaware/load_aware.go index 39b2928151..b2179257db 100644 --- a/pkg/scheduler/plugins/loadaware/load_aware.go +++ b/pkg/scheduler/plugins/loadaware/load_aware.go @@ -145,112 +145,42 @@ func (p *Plugin) Filter(ctx context.Context, state *framework.CycleState, pod *c p.args.NodeMetricExpirationSeconds != nil && isNodeMetricExpired(nodeMetric, *p.args.NodeMetricExpirationSeconds) { return nil } - - filterProfile := generateUsageThresholdsFilterProfile(node, p.args) - if len(filterProfile.ProdUsageThresholds) > 0 && extension.GetPodPriorityClassWithDefault(pod) == extension.PriorityProd { - status := p.filterProdUsage(node, nodeMetric, filterProfile.ProdUsageThresholds) - if !status.IsSuccess() { - return status - } - } else { - var usageThresholds map[corev1.ResourceName]int64 - if filterProfile.AggregatedUsage != nil { - usageThresholds = filterProfile.AggregatedUsage.UsageThresholds - } else { - usageThresholds = filterProfile.UsageThresholds - } - if len(usageThresholds) > 0 { - status := p.filterNodeUsage(node, nodeMetric, filterProfile) - if !status.IsSuccess() { - return status - } - } + if nodeMetric.Status.NodeMetric == nil { + klog.Warningf("nodeMetrics(%s) should not be nil.", node.Name) + return nil } - return nil -} - -func (p *Plugin) filterNodeUsage(node *corev1.Node, nodeMetric *slov1alpha1.NodeMetric, filterProfile *usageThresholdsFilterProfile) *framework.Status { - if nodeMetric.Status.NodeMetric == nil { + allocatable, err := p.estimator.EstimateNode(node) + if err != nil { + klog.ErrorS(err, "Estimated node allocatable failed!", "node", node.Name) return nil } + filterProfile := generateUsageThresholdsFilterProfile(node, p.args) + prodPod := len(filterProfile.ProdUsageThresholds) > 0 && extension.GetPodPriorityClassWithDefault(pod) == extension.PriorityProd + var nodeUsage *slov1alpha1.ResourceMap var usageThresholds map[corev1.ResourceName]int64 - if filterProfile.AggregatedUsage != nil { - usageThresholds = filterProfile.AggregatedUsage.UsageThresholds + if prodPod { + usageThresholds = filterProfile.ProdUsageThresholds } else { - usageThresholds = filterProfile.UsageThresholds - } - - for resourceName, threshold := range usageThresholds { - if threshold == 0 { - continue - } - allocatable, err := p.estimator.EstimateNode(node) - if err != nil { - klog.ErrorS(err, "Failed to EstimateNode", "node", node.Name) - return nil - } - total := allocatable[resourceName] - if total.IsZero() { - continue - } - // TODO(joseph): maybe we should estimate the Pod that just be scheduled that have not reported - var nodeUsage *slov1alpha1.ResourceMap if filterProfile.AggregatedUsage != nil { nodeUsage = getTargetAggregatedUsage( nodeMetric, filterProfile.AggregatedUsage.UsageAggregatedDuration, filterProfile.AggregatedUsage.UsageAggregationType, ) + usageThresholds = filterProfile.AggregatedUsage.UsageThresholds } else { nodeUsage = &nodeMetric.Status.NodeMetric.NodeUsage - } - if nodeUsage == nil { - continue - } - - used := nodeUsage.ResourceList[resourceName] - usage := int64(math.Round(float64(used.MilliValue()) / float64(total.MilliValue()) * 100)) - if usage >= threshold { - reason := ErrReasonUsageExceedThreshold - if filterProfile.AggregatedUsage != nil { - reason = ErrReasonAggregatedUsageExceedThreshold - } - return framework.NewStatus(framework.Unschedulable, fmt.Sprintf(reason, resourceName)) + usageThresholds = filterProfile.UsageThresholds } } - return nil -} - -func (p *Plugin) filterProdUsage(node *corev1.Node, nodeMetric *slov1alpha1.NodeMetric, prodUsageThresholds map[corev1.ResourceName]int64) *framework.Status { - if len(nodeMetric.Status.PodsMetric) == 0 { + estimatedUsed, err := p.GetEstimatedUsed(node.Name, nodeMetric, pod, nodeUsage, prodPod) + if err != nil { + klog.ErrorS(err, "GetEstimatedUsed failed!", "node", node.Name) return nil } - - // TODO(joseph): maybe we should estimate the Pod that just be scheduled that have not reported - podMetrics := buildPodMetricMap(p.podLister, nodeMetric, true) - prodPodUsages, _ := sumPodUsages(podMetrics, nil) - for resourceName, threshold := range prodUsageThresholds { - if threshold == 0 { - continue - } - allocatable, err := p.estimator.EstimateNode(node) - if err != nil { - klog.ErrorS(err, "Failed to EstimateNode", "node", node.Name) - return nil - } - total := allocatable[resourceName] - if total.IsZero() { - continue - } - used := prodPodUsages[resourceName] - usage := int64(math.Round(float64(used.MilliValue()) / float64(total.MilliValue()) * 100)) - if usage >= threshold { - return framework.NewStatus(framework.Unschedulable, fmt.Sprintf(ErrReasonUsageExceedThreshold, resourceName)) - } - } - return nil + return filterNodeUsage(usageThresholds, estimatedUsed, allocatable, prodPod, filterProfile) } func (p *Plugin) ScoreExtensions() framework.ScoreExtensions { @@ -287,13 +217,44 @@ func (p *Plugin) Score(ctx context.Context, state *framework.CycleState, pod *co if p.args.NodeMetricExpirationSeconds != nil && isNodeMetricExpired(nodeMetric, *p.args.NodeMetricExpirationSeconds) { return 0, nil } + if nodeMetric.Status.NodeMetric == nil { + klog.Warningf("nodeMetrics(%s) should not be nil.", node.Name) + return 0, nil + } prodPod := extension.GetPodPriorityClassWithDefault(pod) == extension.PriorityProd && p.args.ScoreAccordingProdUsage + var nodeUsage *slov1alpha1.ResourceMap + if !prodPod { + if scoreWithAggregation(p.args.Aggregated) { + nodeUsage = getTargetAggregatedUsage(nodeMetric, &p.args.Aggregated.ScoreAggregatedDuration, p.args.Aggregated.ScoreAggregationType) + } else { + nodeUsage = &nodeMetric.Status.NodeMetric.NodeUsage + } + } + estimatedUsed, err := p.GetEstimatedUsed(nodeName, nodeMetric, pod, nodeUsage, prodPod) + if err != nil { + klog.ErrorS(err, "GetEstimatedUsed failed!", "node", node.Name) + return 0, nil + } + + allocatable, err := p.estimator.EstimateNode(node) + if err != nil { + klog.ErrorS(err, "Estimated node allocatable failed!", "node", node.Name) + return 0, nil + } + score := loadAwareSchedulingScorer(p.args.ResourceWeights, estimatedUsed, allocatable) + return score, nil +} + +func (p *Plugin) GetEstimatedUsed(nodeName string, nodeMetric *slov1alpha1.NodeMetric, pod *corev1.Pod, nodeUsage *slov1alpha1.ResourceMap, prodPod bool) (map[corev1.ResourceName]int64, error) { + if nodeMetric == nil { + return nil, nil + } podMetrics := buildPodMetricMap(p.podLister, nodeMetric, prodPod) estimatedUsed, err := p.estimator.EstimatePod(pod) if err != nil { - return 0, nil + return nil, err } assignedPodEstimatedUsed, estimatedPods := p.estimatedAssignedPodUsed(nodeName, nodeMetric, podMetrics, prodPod) for resourceName, value := range assignedPodEstimatedUsed { @@ -306,12 +267,6 @@ func (p *Plugin) Score(ctx context.Context, state *framework.CycleState, pod *co } } else { if nodeMetric.Status.NodeMetric != nil { - var nodeUsage *slov1alpha1.ResourceMap - if scoreWithAggregation(p.args.Aggregated) { - nodeUsage = getTargetAggregatedUsage(nodeMetric, &p.args.Aggregated.ScoreAggregatedDuration, p.args.Aggregated.ScoreAggregationType) - } else { - nodeUsage = &nodeMetric.Status.NodeMetric.NodeUsage - } if nodeUsage != nil { for resourceName, quantity := range nodeUsage.ResourceList { if q := estimatedPodActualUsages[resourceName]; !q.IsZero() { @@ -325,13 +280,30 @@ func (p *Plugin) Score(ctx context.Context, state *framework.CycleState, pod *co } } } + return estimatedUsed, nil +} - allocatable, err := p.estimator.EstimateNode(node) - if err != nil { - return 0, nil +func filterNodeUsage(usageThresholds, estimatedUsed map[corev1.ResourceName]int64, allocatable corev1.ResourceList, prodPod bool, filterProfile *usageThresholdsFilterProfile) *framework.Status { + for resourceName, value := range usageThresholds { + if value == 0 { + continue + } + total := getResourceValue(resourceName, allocatable[resourceName]) + if total == 0 { + continue + } + usage := int64(math.Round(float64(estimatedUsed[resourceName]) / float64(total) * 100)) + if usage <= value { + continue + } + + reason := ErrReasonUsageExceedThreshold + if !prodPod && filterProfile.AggregatedUsage != nil { + reason = ErrReasonAggregatedUsageExceedThreshold + } + return framework.NewStatus(framework.Unschedulable, fmt.Sprintf(reason, resourceName)) } - score := loadAwareSchedulingScorer(p.args.ResourceWeights, estimatedUsed, allocatable) - return score, nil + return nil } func (p *Plugin) estimatedAssignedPodUsed(nodeName string, nodeMetric *slov1alpha1.NodeMetric, podMetrics map[string]corev1.ResourceList, filterProdPod bool) (map[corev1.ResourceName]int64, sets.String) { diff --git a/pkg/scheduler/plugins/loadaware/load_aware_test.go b/pkg/scheduler/plugins/loadaware/load_aware_test.go index 5e8d6f3645..c6819ece67 100644 --- a/pkg/scheduler/plugins/loadaware/load_aware_test.go +++ b/pkg/scheduler/plugins/loadaware/load_aware_test.go @@ -266,6 +266,7 @@ func TestFilterUsage(t *testing.T) { aggregated *v1beta2.LoadAwareSchedulingAggregatedArgs customUsageThresholds map[corev1.ResourceName]int64 customProdUsageThresholds map[corev1.ResourceName]int64 + assignedPod []*podAssignInfo customAggregatedUsage *extension.CustomAggregatedUsage nodeName string nodeMetric *slov1alpha1.NodeMetric @@ -800,6 +801,364 @@ func TestFilterUsage(t *testing.T) { testPod: schedulertesting.MakePod().Namespace("default").Name("test-pod").Priority(extension.PriorityProdValueMax).OwnerReference("test-daemonset", schema.GroupVersionKind{Group: "apps", Version: "v1", Kind: "DaemonSet"}).Obj(), wantStatus: nil, }, + { + name: "filter prod cpu usage with new pod request configuration", + nodeName: "test-node-1", + usageThresholds: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 65, + corev1.ResourceMemory: 100, + }, + prodUsageThresholds: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 60, + corev1.ResourceMemory: 100, + }, + nodeMetric: &slov1alpha1.NodeMetric{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node-1", + }, + Spec: slov1alpha1.NodeMetricSpec{ + CollectPolicy: &slov1alpha1.NodeMetricCollectPolicy{ + ReportIntervalSeconds: pointer.Int64(60), + }, + }, + Status: slov1alpha1.NodeMetricStatus{ + UpdateTime: &metav1.Time{ + Time: time.Now(), + }, + NodeMetric: &slov1alpha1.NodeMetricInfo{ + NodeUsage: slov1alpha1.ResourceMap{ + ResourceList: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("53"), + corev1.ResourceMemory: resource.MustParse("500Gi"), + }, + }, + }, + PodsMetric: []*slov1alpha1.PodMetricInfo{ + { + Namespace: "default", + Name: "prod-pod-1", + PodUsage: slov1alpha1.ResourceMap{ + ResourceList: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("30"), + corev1.ResourceMemory: resource.MustParse("100Gi"), + }, + }, + }, + { + Namespace: "default", + Name: "prod-pod-2", + PodUsage: slov1alpha1.ResourceMap{ + ResourceList: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("23"), + corev1.ResourceMemory: resource.MustParse("200Gi"), + }, + }, + }, + }, + }, + }, + pods: []*corev1.Pod{ + schedulertesting.MakePod().Namespace("default").Name("prod-pod-1").Priority(extension.PriorityProdValueMax).Obj(), + schedulertesting.MakePod().Namespace("default").Name("prod-pod-2").Priority(extension.PriorityProdValueMax).Obj(), + }, + testPod: schedulertesting.MakePod().Namespace("default").Name("prod-pod-3").Req(map[corev1.ResourceName]string{corev1.ResourceCPU: "20", corev1.ResourceMemory: "100Gi"}).Priority(extension.PriorityProdValueMax).Obj(), + wantStatus: framework.NewStatus(framework.Unschedulable, fmt.Sprintf(ErrReasonUsageExceedThreshold, corev1.ResourceCPU)), + }, + { + name: "filter mid cpu usage with new pod request configuration", + nodeName: "test-node-1", + usageThresholds: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 65, + corev1.ResourceMemory: 100, + }, + prodUsageThresholds: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 60, + corev1.ResourceMemory: 100, + }, + nodeMetric: &slov1alpha1.NodeMetric{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node-1", + }, + Spec: slov1alpha1.NodeMetricSpec{ + CollectPolicy: &slov1alpha1.NodeMetricCollectPolicy{ + ReportIntervalSeconds: pointer.Int64(60), + }, + }, + Status: slov1alpha1.NodeMetricStatus{ + UpdateTime: &metav1.Time{ + Time: time.Now(), + }, + NodeMetric: &slov1alpha1.NodeMetricInfo{ + NodeUsage: slov1alpha1.ResourceMap{ + ResourceList: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("53"), + corev1.ResourceMemory: resource.MustParse("300Gi"), + }, + }, + }, + PodsMetric: []*slov1alpha1.PodMetricInfo{ + { + Namespace: "default", + Name: "prod-pod-1", + PodUsage: slov1alpha1.ResourceMap{ + ResourceList: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("30"), + corev1.ResourceMemory: resource.MustParse("100Gi"), + }, + }, + }, + { + Namespace: "default", + Name: "prod-pod-2", + PodUsage: slov1alpha1.ResourceMap{ + ResourceList: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("23"), + corev1.ResourceMemory: resource.MustParse("200Gi"), + }, + }, + }, + }, + }, + }, + pods: []*corev1.Pod{ + schedulertesting.MakePod().Namespace("default").Name("prod-pod-1").Priority(extension.PriorityProdValueMax).Obj(), + schedulertesting.MakePod().Namespace("default").Name("prod-pod-2").Priority(extension.PriorityProdValueMax).Obj(), + }, + testPod: schedulertesting.MakePod().Namespace("default").Name("prod-pod-3").Req(map[corev1.ResourceName]string{extension.MidCPU: "12k", extension.MidMemory: "100Gi"}).Priority(extension.PriorityMidValueMax).Obj(), + wantStatus: framework.NewStatus(framework.Unschedulable, fmt.Sprintf(ErrReasonUsageExceedThreshold, corev1.ResourceCPU)), + }, + { + name: "filter memory usage with new pod request configuration", + nodeName: "test-node-1", + usageThresholds: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 65, + corev1.ResourceMemory: 85, + }, + prodUsageThresholds: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 60, + corev1.ResourceMemory: 80, + }, + nodeMetric: &slov1alpha1.NodeMetric{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node-1", + }, + Spec: slov1alpha1.NodeMetricSpec{ + CollectPolicy: &slov1alpha1.NodeMetricCollectPolicy{ + ReportIntervalSeconds: pointer.Int64(60), + }, + }, + Status: slov1alpha1.NodeMetricStatus{ + UpdateTime: &metav1.Time{ + Time: time.Now(), + }, + NodeMetric: &slov1alpha1.NodeMetricInfo{ + NodeUsage: slov1alpha1.ResourceMap{ + ResourceList: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("53"), + corev1.ResourceMemory: resource.MustParse("300Gi"), + }, + }, + }, + PodsMetric: []*slov1alpha1.PodMetricInfo{ + { + Namespace: "default", + Name: "prod-pod-1", + PodUsage: slov1alpha1.ResourceMap{ + ResourceList: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("30"), + corev1.ResourceMemory: resource.MustParse("100Gi"), + }, + }, + }, + { + Namespace: "default", + Name: "prod-pod-2", + PodUsage: slov1alpha1.ResourceMap{ + ResourceList: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("23"), + corev1.ResourceMemory: resource.MustParse("200Gi"), + }, + }, + }, + }, + }, + }, + pods: []*corev1.Pod{ + schedulertesting.MakePod().Namespace("default").Name("prod-pod-1").Priority(extension.PriorityProdValueMax).Obj(), + schedulertesting.MakePod().Namespace("default").Name("prod-pod-2").Priority(extension.PriorityProdValueMax).Obj(), + }, + testPod: schedulertesting.MakePod().Namespace("default").Name("prod-pod-3").Req(map[corev1.ResourceName]string{corev1.ResourceCPU: "1", corev1.ResourceMemory: "165Gi"}).Priority(extension.PriorityProdValueMax).Obj(), + wantStatus: framework.NewStatus(framework.Unschedulable, fmt.Sprintf(ErrReasonUsageExceedThreshold, corev1.ResourceMemory)), + }, + { + name: "filter mid memory usage with new pod request configuration", + nodeName: "test-node-1", + usageThresholds: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 65, + corev1.ResourceMemory: 85, + }, + prodUsageThresholds: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 60, + corev1.ResourceMemory: 80, + }, + nodeMetric: &slov1alpha1.NodeMetric{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node-1", + }, + Spec: slov1alpha1.NodeMetricSpec{ + CollectPolicy: &slov1alpha1.NodeMetricCollectPolicy{ + ReportIntervalSeconds: pointer.Int64(60), + }, + }, + Status: slov1alpha1.NodeMetricStatus{ + UpdateTime: &metav1.Time{ + Time: time.Now(), + }, + NodeMetric: &slov1alpha1.NodeMetricInfo{ + NodeUsage: slov1alpha1.ResourceMap{ + ResourceList: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("53"), + corev1.ResourceMemory: resource.MustParse("300Gi"), + }, + }, + }, + PodsMetric: []*slov1alpha1.PodMetricInfo{ + { + Namespace: "default", + Name: "prod-pod-1", + PodUsage: slov1alpha1.ResourceMap{ + ResourceList: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("30"), + corev1.ResourceMemory: resource.MustParse("100Gi"), + }, + }, + }, + { + Namespace: "default", + Name: "prod-pod-2", + PodUsage: slov1alpha1.ResourceMap{ + ResourceList: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("23"), + corev1.ResourceMemory: resource.MustParse("200Gi"), + }, + }, + }, + }, + }, + }, + pods: []*corev1.Pod{ + schedulertesting.MakePod().Namespace("default").Name("prod-pod-1").Priority(extension.PriorityProdValueMax).Obj(), + schedulertesting.MakePod().Namespace("default").Name("prod-pod-2").Priority(extension.PriorityProdValueMax).Obj(), + }, + testPod: schedulertesting.MakePod().Namespace("default").Name("prod-pod-3").Req(map[corev1.ResourceName]string{extension.MidCPU: "1k", extension.MidMemory: "200Gi"}).Priority(extension.PriorityMidValueMax).Obj(), + wantStatus: framework.NewStatus(framework.Unschedulable, fmt.Sprintf(ErrReasonUsageExceedThreshold, corev1.ResourceMemory)), + }, + { + name: "filter prod memory usage with assignedCache pod", + nodeName: "test-node-1", + usageThresholds: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 65, + corev1.ResourceMemory: 85, + }, + prodUsageThresholds: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 60, + corev1.ResourceMemory: 70, + }, + nodeMetric: &slov1alpha1.NodeMetric{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node-1", + }, + Spec: slov1alpha1.NodeMetricSpec{ + CollectPolicy: &slov1alpha1.NodeMetricCollectPolicy{ + ReportIntervalSeconds: pointer.Int64(60), + }, + }, + Status: slov1alpha1.NodeMetricStatus{ + UpdateTime: &metav1.Time{ + Time: time.Now(), + }, + NodeMetric: &slov1alpha1.NodeMetricInfo{ + NodeUsage: slov1alpha1.ResourceMap{ + ResourceList: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("53"), + corev1.ResourceMemory: resource.MustParse("300Gi"), + }, + }, + }, + PodsMetric: []*slov1alpha1.PodMetricInfo{ + { + Namespace: "default", + Name: "prod-pod-1", + PodUsage: slov1alpha1.ResourceMap{ + ResourceList: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("30"), + corev1.ResourceMemory: resource.MustParse("100Gi"), + }, + }, + }, + }, + }, + }, + pods: []*corev1.Pod{ + schedulertesting.MakePod().Namespace("default").Name("prod-pod-1").Priority(extension.PriorityProdValueMax).Obj(), + schedulertesting.MakePod().Namespace("default").Name("prod-pod-2").Req(map[corev1.ResourceName]string{corev1.ResourceCPU: "1", corev1.ResourceMemory: "200Gi"}).Priority(extension.PriorityProdValueMax).Obj(), + }, + testPod: schedulertesting.MakePod().Namespace("default").Name("prod-pod-3").Req(map[corev1.ResourceName]string{corev1.ResourceCPU: "1", corev1.ResourceMemory: "200Gi"}).Priority(extension.PriorityProdValueMax).Obj(), + wantStatus: framework.NewStatus(framework.Unschedulable, fmt.Sprintf(ErrReasonUsageExceedThreshold, corev1.ResourceMemory)), + }, + { + name: "filter mid cpu usage with assignedCache pod", + nodeName: "test-node-1", + usageThresholds: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 65, + corev1.ResourceMemory: 85, + }, + prodUsageThresholds: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 60, + corev1.ResourceMemory: 70, + }, + nodeMetric: &slov1alpha1.NodeMetric{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node-1", + }, + Spec: slov1alpha1.NodeMetricSpec{ + CollectPolicy: &slov1alpha1.NodeMetricCollectPolicy{ + ReportIntervalSeconds: pointer.Int64(60), + }, + }, + Status: slov1alpha1.NodeMetricStatus{ + UpdateTime: &metav1.Time{ + Time: time.Now(), + }, + NodeMetric: &slov1alpha1.NodeMetricInfo{ + NodeUsage: slov1alpha1.ResourceMap{ + ResourceList: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("33"), + corev1.ResourceMemory: resource.MustParse("110Gi"), + }, + }, + }, + PodsMetric: []*slov1alpha1.PodMetricInfo{ + { + Namespace: "default", + Name: "prod-pod-1", + PodUsage: slov1alpha1.ResourceMap{ + ResourceList: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("30"), + corev1.ResourceMemory: resource.MustParse("100Gi"), + }, + }, + }, + }, + }, + }, + pods: []*corev1.Pod{ + schedulertesting.MakePod().Namespace("default").Name("prod-pod-1").Priority(extension.PriorityProdValueMax).Obj(), + schedulertesting.MakePod().Namespace("default").Name("mid-pod-2").Req(map[corev1.ResourceName]string{extension.MidCPU: "20k", extension.MidMemory: "200Gi"}).Priority(extension.PriorityMidValueMax).Obj(), + }, + testPod: schedulertesting.MakePod().Namespace("default").Name("mid-pod-3").Req(map[corev1.ResourceName]string{extension.MidCPU: "20k", extension.MidMemory: "200Gi"}).Priority(extension.PriorityMidValueMax).Obj(), + wantStatus: framework.NewStatus(framework.Unschedulable, fmt.Sprintf(ErrReasonUsageExceedThreshold, corev1.ResourceCPU)), + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -903,6 +1262,19 @@ func TestFilterUsage(t *testing.T) { if testPod == nil { testPod = &corev1.Pod{} } + assignCache := p.(*Plugin).podAssignCache + for _, v := range tt.pods { + m := assignCache.podInfoItems[tt.nodeName] + if m == nil { + m = map[types.UID]*podAssignInfo{} + assignCache.podInfoItems[tt.nodeName] = m + } + podUid := uuid.NewUUID() + m[podUid] = &podAssignInfo{ + timestamp: time.Now().Add(-10 * time.Second), + pod: v, + } + } status := p.(*Plugin).Filter(context.TODO(), cycleState, testPod, nodeInfo) assert.True(t, tt.wantStatus.Equal(status), "want status: %s, but got %s", tt.wantStatus.Message(), status.Message()) @@ -938,6 +1310,11 @@ func TestScore(t *testing.T) { UpdateTime: &metav1.Time{ Time: time.Now().Add(-180 * time.Second), }, + NodeMetric: &slov1alpha1.NodeMetricInfo{ + NodeUsage: slov1alpha1.ResourceMap{ + ResourceList: corev1.ResourceList{}, + }, + }, }, }, wantScore: 0, @@ -982,6 +1359,11 @@ func TestScore(t *testing.T) { UpdateTime: &metav1.Time{ Time: time.Now(), }, + NodeMetric: &slov1alpha1.NodeMetricInfo{ + NodeUsage: slov1alpha1.ResourceMap{ + ResourceList: corev1.ResourceList{}, + }, + }, }, }, wantScore: 90, @@ -1579,6 +1961,11 @@ func TestScore(t *testing.T) { UpdateTime: &metav1.Time{ Time: time.Now(), }, + NodeMetric: &slov1alpha1.NodeMetricInfo{ + NodeUsage: slov1alpha1.ResourceMap{ + ResourceList: corev1.ResourceList{}, + }, + }, }, }, wantScore: 90, @@ -1655,6 +2042,11 @@ func TestScore(t *testing.T) { UpdateTime: &metav1.Time{ Time: time.Now(), }, + NodeMetric: &slov1alpha1.NodeMetricInfo{ + NodeUsage: slov1alpha1.ResourceMap{ + ResourceList: corev1.ResourceList{}, + }, + }, PodsMetric: []*slov1alpha1.PodMetricInfo{ { Namespace: "default", @@ -1711,6 +2103,11 @@ func TestScore(t *testing.T) { UpdateTime: &metav1.Time{ Time: time.Now(), }, + NodeMetric: &slov1alpha1.NodeMetricInfo{ + NodeUsage: slov1alpha1.ResourceMap{ + ResourceList: corev1.ResourceList{}, + }, + }, }, }, wantScore: 88, @@ -1745,6 +2142,11 @@ func TestScore(t *testing.T) { UpdateTime: &metav1.Time{ Time: time.Now(), }, + NodeMetric: &slov1alpha1.NodeMetricInfo{ + NodeUsage: slov1alpha1.ResourceMap{ + ResourceList: corev1.ResourceList{}, + }, + }, }, }, wantScore: 99,