scheduler:support multi gpu share

Signed-off-by: machao <986292120@qq.com>
koordinator-sh · Jul 13, 2024 · e33c0a9
1 parent 6861c87
commit e33c0a9
Show file tree

Hide file tree

Showing 14 changed files with 1,947 additions and 901 deletions.
diff --git a/apis/configuration/slo_controller_config.go b/apis/configuration/slo_controller_config.go
@@ -251,7 +251,8 @@ type ColocationStrategy struct {
 	// MidMemoryThresholdPercent defines the maximum percentage of the Mid-tier memory resource dividing the node allocatable.
 	// MidMemoryAllocatable <= NodeMemoryAllocatable * MidMemoryThresholdPercent / 100.
 	MidMemoryThresholdPercent *int64 `json:"midMemoryThresholdPercent,omitempty" validate:"omitempty,min=0,max=100"`
-
+	// UseDeviceGPUSharedResource determines use shared resource reported by deivce or calculated by slo-controller noderesource plugin
+	UseDeviceGPUSharedResource *bool            `json:"useDeviceGPUSharedResource,omitempty"`
 	ColocationStrategyExtender `json:",inline"` // for third-party extension
 }
 

diff --git a/pkg/scheduler/plugins/deviceshare/device_allocator.go b/pkg/scheduler/plugins/deviceshare/device_allocator.go
@@ -163,6 +163,7 @@ func (a *AutopilotAllocator) calcRequestsAndCountByDeviceType(
 ) (map[schedulingv1alpha1.DeviceType]corev1.ResourceList, map[schedulingv1alpha1.DeviceType]int, *framework.Status) {
 	requestPerInstance := map[schedulingv1alpha1.DeviceType]corev1.ResourceList{}
 	desiredCountPerDeviceType := map[schedulingv1alpha1.DeviceType]int{}
+	fmt.Println("podRequest:", podRequests)
 	for deviceType, requests := range podRequests {
 		if quotav1.IsZero(requests) {
 			continue

diff --git a/pkg/scheduler/plugins/deviceshare/devicehandler_gpu.go b/pkg/scheduler/plugins/deviceshare/devicehandler_gpu.go
@@ -43,6 +43,8 @@ func (h *GPUHandler) CalcDesiredRequestsAndCount(node *corev1.Node, pod *corev1.
 		return nil, 0, framework.NewStatus(framework.UnschedulableAndUnresolvable, fmt.Sprintf("Insufficient %s devices", schedulingv1alpha1.GPU))
 	}
 
+	fmt.Println("---podRequests:", podRequests)
+
 	podRequests = podRequests.DeepCopy()
 	if err := fillGPUTotalMem(totalDevice, podRequests); err != nil {
 		return nil, 0, framework.NewStatus(framework.UnschedulableAndUnresolvable, err.Error())
@@ -51,17 +53,26 @@ func (h *GPUHandler) CalcDesiredRequestsAndCount(node *corev1.Node, pod *corev1.
 	requests := podRequests
 	desiredCount := int64(1)
 
-	memoryRatio := podRequests[apiext.ResourceGPUMemoryRatio]
-	multiDevices := memoryRatio.Value() > 100 && memoryRatio.Value()%100 == 0
-	if multiDevices {
-		gpuCore, gpuMem, gpuMemoryRatio := podRequests[apiext.ResourceGPUCore], podRequests[apiext.ResourceGPUMemory], podRequests[apiext.ResourceGPUMemoryRatio]
-		desiredCount = gpuMemoryRatio.Value() / 100
+	gpuShare, ok := podRequests[apiext.ResourceGPUShared]
+	gpuCore, gpuMem, gpuMemoryRatio := podRequests[apiext.ResourceGPUCore], podRequests[apiext.ResourceGPUMemory], podRequests[apiext.ResourceGPUMemoryRatio]
+	// gpu share mode
+	if ok && gpuShare.Value() > 0 {
+		desiredCount = gpuShare.Value()
+	} else {
+		if gpuMemoryRatio.Value() > 100 && gpuMemoryRatio.Value()%100 == 0 {
+			desiredCount = gpuMemoryRatio.Value() / 100
+		}
+	}
+
+	if desiredCount > 1 {
 		requests = corev1.ResourceList{
 			apiext.ResourceGPUCore:        *resource.NewQuantity(gpuCore.Value()/desiredCount, resource.DecimalSI),
 			apiext.ResourceGPUMemory:      *resource.NewQuantity(gpuMem.Value()/desiredCount, resource.BinarySI),
 			apiext.ResourceGPUMemoryRatio: *resource.NewQuantity(gpuMemoryRatio.Value()/desiredCount, resource.DecimalSI),
 		}
 	}
+
+	fmt.Println("request:", requests)
 	return requests, int(desiredCount), nil
 }
 

diff --git a/pkg/scheduler/plugins/deviceshare/plugin.go b/pkg/scheduler/plugins/deviceshare/plugin.go
@@ -416,6 +416,7 @@ func (p *Plugin) Reserve(ctx context.Context, cycleState *framework.CycleState,
 		affinity = store.GetAffinity(nodeInfo.Node().Name)
 	}
 
+	fmt.Println("---state", state)
 	allocator := &AutopilotAllocator{
 		state:      state,
 		nodeDevice: nodeDeviceInfo,
@@ -438,7 +439,9 @@ func (p *Plugin) Reserve(ctx context.Context, cycleState *framework.CycleState,
 		return status
 	}
 	if len(result) == 0 {
+		fmt.Println("---allocated with nominated is null")
 		preemptible = appendAllocated(preemptible, restoreState.mergedMatchedAllocatable)
+		fmt.Println("-----preemptible", preemptible)
 		result, status = allocator.Allocate(nil, nil, nil, preemptible)
 		if !status.IsSuccess() {
 			return status