From 2555cab054ce6f43ecd4961259f3cf888de77007 Mon Sep 17 00:00:00 2001 From: "wangjianyu.wjy" Date: Tue, 28 May 2024 10:52:54 +0800 Subject: [PATCH] manager: only sync label when label key exists Signed-off-by: wangjianyu.wjy --- .../plugins/gpudeviceresource/plugin.go | 16 ++- .../plugins/gpudeviceresource/plugin_test.go | 130 ++++++++++++++++++ 2 files changed, 140 insertions(+), 6 deletions(-) diff --git a/pkg/slo-controller/noderesource/plugins/gpudeviceresource/plugin.go b/pkg/slo-controller/noderesource/plugins/gpudeviceresource/plugin.go index 116e977ab..535e9c308 100644 --- a/pkg/slo-controller/noderesource/plugins/gpudeviceresource/plugin.go +++ b/pkg/slo-controller/noderesource/plugins/gpudeviceresource/plugin.go @@ -183,13 +183,17 @@ func (p *Plugin) calculate(node *corev1.Node, device *schedulingv1alpha1.Device) klog.V(5).InfoS("calculate gpu resources", "node", node.Name, "resources", gpuResources) // calculate labels about gpu driver and model - if device.Labels != nil { + updatedLabels := map[string]string{} + if gpuModel, ok := device.Labels[extension.LabelGPUModel]; ok { + updatedLabels[extension.LabelGPUModel] = gpuModel + } + if gpuDriverVersion, ok := device.Labels[extension.LabelGPUDriverVersion]; ok { + updatedLabels[extension.LabelGPUDriverVersion] = gpuDriverVersion + } + if len(updatedLabels) != 0 { items = append(items, framework.ResourceItem{ - Name: PluginName, - Labels: map[string]string{ - extension.LabelGPUModel: device.Labels[extension.LabelGPUModel], - extension.LabelGPUDriverVersion: device.Labels[extension.LabelGPUDriverVersion], - }, + Name: PluginName, + Labels: updatedLabels, Message: UpdateLabelsMsg, }) } diff --git a/pkg/slo-controller/noderesource/plugins/gpudeviceresource/plugin_test.go b/pkg/slo-controller/noderesource/plugins/gpudeviceresource/plugin_test.go index e966ea1fc..5d9573c52 100644 --- a/pkg/slo-controller/noderesource/plugins/gpudeviceresource/plugin_test.go +++ b/pkg/slo-controller/noderesource/plugins/gpudeviceresource/plugin_test.go @@ -536,6 +536,72 @@ func TestPluginCalculate(t *testing.T) { }, }, } + deviceMissingLabels := &schedulingv1alpha1.Device{ + ObjectMeta: metav1.ObjectMeta{ + Name: testNode.Name, + Labels: nil, + }, + Spec: schedulingv1alpha1.DeviceSpec{ + Devices: []schedulingv1alpha1.DeviceInfo{ + { + UUID: "1", + Minor: pointer.Int32(0), + Health: true, + Type: schedulingv1alpha1.GPU, + Resources: map[corev1.ResourceName]resource.Quantity{ + extension.ResourceGPUCore: *resource.NewQuantity(100, resource.DecimalSI), + extension.ResourceGPUMemory: *resource.NewQuantity(8000, resource.DecimalSI), + extension.ResourceGPUMemoryRatio: *resource.NewQuantity(100, resource.DecimalSI), + }, + }, + { + UUID: "2", + Minor: pointer.Int32(1), + Health: true, + Type: schedulingv1alpha1.GPU, + Resources: map[corev1.ResourceName]resource.Quantity{ + extension.ResourceGPUCore: *resource.NewQuantity(100, resource.DecimalSI), + extension.ResourceGPUMemory: *resource.NewQuantity(10000, resource.DecimalSI), + extension.ResourceGPUMemoryRatio: *resource.NewQuantity(100, resource.DecimalSI), + }, + }, + }, + }, + } + deviceMissingGPURelatedLabels := &schedulingv1alpha1.Device{ + ObjectMeta: metav1.ObjectMeta{ + Name: testNode.Name, + Labels: map[string]string{ + "anything": "anything", + }, + }, + Spec: schedulingv1alpha1.DeviceSpec{ + Devices: []schedulingv1alpha1.DeviceInfo{ + { + UUID: "1", + Minor: pointer.Int32(0), + Health: true, + Type: schedulingv1alpha1.GPU, + Resources: map[corev1.ResourceName]resource.Quantity{ + extension.ResourceGPUCore: *resource.NewQuantity(100, resource.DecimalSI), + extension.ResourceGPUMemory: *resource.NewQuantity(8000, resource.DecimalSI), + extension.ResourceGPUMemoryRatio: *resource.NewQuantity(100, resource.DecimalSI), + }, + }, + { + UUID: "2", + Minor: pointer.Int32(1), + Health: true, + Type: schedulingv1alpha1.GPU, + Resources: map[corev1.ResourceName]resource.Quantity{ + extension.ResourceGPUCore: *resource.NewQuantity(100, resource.DecimalSI), + extension.ResourceGPUMemory: *resource.NewQuantity(10000, resource.DecimalSI), + extension.ResourceGPUMemoryRatio: *resource.NewQuantity(100, resource.DecimalSI), + }, + }, + }, + }, + } type fields struct { client ctrlclient.Client } @@ -651,6 +717,70 @@ func TestPluginCalculate(t *testing.T) { }, wantErr: false, }, + { + name: "calculate device resources correctly", + fields: fields{ + client: fake.NewClientBuilder().WithScheme(testScheme).WithObjects(testNode, deviceMissingLabels).Build(), + }, + args: args{ + node: testNode, + }, + want: []framework.ResourceItem{ + { + Name: extension.ResourceGPU, + Quantity: resource.NewQuantity(200, resource.DecimalSI), + Message: UpdateResourcesMsg, + }, + { + Name: extension.ResourceGPUCore, + Quantity: resource.NewQuantity(200, resource.DecimalSI), + Message: UpdateResourcesMsg, + }, + { + Name: extension.ResourceGPUMemory, + Quantity: resource.NewScaledQuantity(18, 3), + Message: UpdateResourcesMsg, + }, + { + Name: extension.ResourceGPUMemoryRatio, + Quantity: resource.NewQuantity(200, resource.DecimalSI), + Message: UpdateResourcesMsg, + }, + }, + wantErr: false, + }, + { + name: "calculate device resources correctly", + fields: fields{ + client: fake.NewClientBuilder().WithScheme(testScheme).WithObjects(testNode, deviceMissingGPURelatedLabels).Build(), + }, + args: args{ + node: testNode, + }, + want: []framework.ResourceItem{ + { + Name: extension.ResourceGPU, + Quantity: resource.NewQuantity(200, resource.DecimalSI), + Message: UpdateResourcesMsg, + }, + { + Name: extension.ResourceGPUCore, + Quantity: resource.NewQuantity(200, resource.DecimalSI), + Message: UpdateResourcesMsg, + }, + { + Name: extension.ResourceGPUMemory, + Quantity: resource.NewScaledQuantity(18, 3), + Message: UpdateResourcesMsg, + }, + { + Name: extension.ResourceGPUMemoryRatio, + Quantity: resource.NewQuantity(200, resource.DecimalSI), + Message: UpdateResourcesMsg, + }, + }, + wantErr: false, + }, { name: "calculate resetting device resources", fields: fields{