Skip to content

Commit

Permalink
scheduler:support multi gpu share
Browse files Browse the repository at this point in the history
Signed-off-by: machao <986292120@qq.com>
  • Loading branch information
AdrianMachao committed Jul 10, 2024
1 parent 448015c commit 7e3031b
Show file tree
Hide file tree
Showing 11 changed files with 1,040 additions and 20 deletions.
24 changes: 15 additions & 9 deletions pkg/scheduler/plugins/deviceshare/devicehandler_gpu.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,17 +51,23 @@ func (h *GPUHandler) CalcDesiredRequestsAndCount(node *corev1.Node, pod *corev1.
requests := podRequests
desiredCount := int64(1)

memoryRatio := podRequests[apiext.ResourceGPUMemoryRatio]
multiDevices := memoryRatio.Value() > 100 && memoryRatio.Value()%100 == 0
if multiDevices {
gpuCore, gpuMem, gpuMemoryRatio := podRequests[apiext.ResourceGPUCore], podRequests[apiext.ResourceGPUMemory], podRequests[apiext.ResourceGPUMemoryRatio]
desiredCount = gpuMemoryRatio.Value() / 100
requests = corev1.ResourceList{
apiext.ResourceGPUCore: *resource.NewQuantity(gpuCore.Value()/desiredCount, resource.DecimalSI),
apiext.ResourceGPUMemory: *resource.NewQuantity(gpuMem.Value()/desiredCount, resource.BinarySI),
apiext.ResourceGPUMemoryRatio: *resource.NewQuantity(gpuMemoryRatio.Value()/desiredCount, resource.DecimalSI),
gpuShare, ok := podRequests[apiext.ResourceGPUShared]
gpuCore, gpuMem, gpuMemoryRatio := podRequests[apiext.ResourceGPUCore], podRequests[apiext.ResourceGPUMemory], podRequests[apiext.ResourceGPUMemoryRatio]
// gpu share mode
if ok && gpuShare.Value() > 0 {
desiredCount = gpuShare.Value()
} else {
if gpuMemoryRatio.Value() > 100 && gpuMemoryRatio.Value()%100 == 0 {
desiredCount = gpuMemoryRatio.Value() / 100
}
}

requests = corev1.ResourceList{
apiext.ResourceGPUCore: *resource.NewQuantity(gpuCore.Value()/desiredCount, resource.DecimalSI),
apiext.ResourceGPUMemory: *resource.NewQuantity(gpuMem.Value()/desiredCount, resource.BinarySI),
apiext.ResourceGPUMemoryRatio: *resource.NewQuantity(gpuMemoryRatio.Value()/desiredCount, resource.DecimalSI),
}

return requests, int(desiredCount), nil
}

Expand Down
216 changes: 216 additions & 0 deletions pkg/scheduler/plugins/deviceshare/plugin_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1848,6 +1848,221 @@ func Test_Plugin_Filter(t *testing.T) {
nodeInfo: testNodeInfo,
want: nil,
},
{
name: "pod stuck when use multi gpu",
state: &preFilterState{
skip: false,
podRequests: map[schedulingv1alpha1.DeviceType]corev1.ResourceList{
schedulingv1alpha1.GPU: {
apiext.ResourceGPUShared: resource.MustParse("4"),
apiext.ResourceGPUMemory: resource.MustParse("160G"),
},
},
},
// reserved: apiext.DeviceAllocations{},
nodeDeviceCache: &nodeDeviceCache{
nodeDeviceInfos: map[string]*nodeDevice{
"test-node": {
allocateSet: map[schedulingv1alpha1.DeviceType]map[types.NamespacedName]deviceResources{},
deviceFree: map[schedulingv1alpha1.DeviceType]deviceResources{
schedulingv1alpha1.GPU: {
0: corev1.ResourceList{
apiext.ResourceGPUCore: resource.MustParse("100"),
apiext.ResourceGPUMemoryRatio: resource.MustParse("100"),
apiext.ResourceGPUMemory: resource.MustParse("80Gi"),
},
1: corev1.ResourceList{
apiext.ResourceGPUCore: resource.MustParse("100"),
apiext.ResourceGPUMemoryRatio: resource.MustParse("100"),
apiext.ResourceGPUMemory: resource.MustParse("80Gi"),
},
2: corev1.ResourceList{
apiext.ResourceGPUCore: resource.MustParse("100"),
apiext.ResourceGPUMemoryRatio: resource.MustParse("100"),
apiext.ResourceGPUMemory: resource.MustParse("80Gi"),
},
3: corev1.ResourceList{
apiext.ResourceGPUCore: resource.MustParse("100"),
apiext.ResourceGPUMemoryRatio: resource.MustParse("100"),
apiext.ResourceGPUMemory: resource.MustParse("80Gi"),
},
4: corev1.ResourceList{
apiext.ResourceGPUCore: resource.MustParse("100"),
apiext.ResourceGPUMemoryRatio: resource.MustParse("100"),
apiext.ResourceGPUMemory: resource.MustParse("80Gi"),
},
5: corev1.ResourceList{
apiext.ResourceGPUCore: resource.MustParse("100"),
apiext.ResourceGPUMemoryRatio: resource.MustParse("100"),
apiext.ResourceGPUMemory: resource.MustParse("80Gi"),
},
6: corev1.ResourceList{
apiext.ResourceGPUCore: resource.MustParse("100"),
apiext.ResourceGPUMemoryRatio: resource.MustParse("100"),
apiext.ResourceGPUMemory: resource.MustParse("80Gi"),
},
7: corev1.ResourceList{
apiext.ResourceGPUCore: resource.MustParse("100"),
apiext.ResourceGPUMemoryRatio: resource.MustParse("100"),
apiext.ResourceGPUMemory: resource.MustParse("80Gi"),
},
8: corev1.ResourceList{
apiext.ResourceGPUCore: resource.MustParse("100"),
apiext.ResourceGPUMemoryRatio: resource.MustParse("100"),
apiext.ResourceGPUMemory: resource.MustParse("80Gi"),
},
},
},
deviceTotal: map[schedulingv1alpha1.DeviceType]deviceResources{
schedulingv1alpha1.GPU: {
0: corev1.ResourceList{
apiext.ResourceGPUCore: resource.MustParse("100"),
apiext.ResourceGPUMemoryRatio: resource.MustParse("100"),
apiext.ResourceGPUMemory: resource.MustParse("80Gi"),
},
1: corev1.ResourceList{
apiext.ResourceGPUCore: resource.MustParse("100"),
apiext.ResourceGPUMemoryRatio: resource.MustParse("100"),
apiext.ResourceGPUMemory: resource.MustParse("80Gi"),
},
2: corev1.ResourceList{
apiext.ResourceGPUCore: resource.MustParse("100"),
apiext.ResourceGPUMemoryRatio: resource.MustParse("100"),
apiext.ResourceGPUMemory: resource.MustParse("80Gi"),
},
3: corev1.ResourceList{
apiext.ResourceGPUCore: resource.MustParse("100"),
apiext.ResourceGPUMemoryRatio: resource.MustParse("100"),
apiext.ResourceGPUMemory: resource.MustParse("80Gi"),
},
4: corev1.ResourceList{
apiext.ResourceGPUCore: resource.MustParse("100"),
apiext.ResourceGPUMemoryRatio: resource.MustParse("100"),
apiext.ResourceGPUMemory: resource.MustParse("80Gi"),
},
5: corev1.ResourceList{
apiext.ResourceGPUCore: resource.MustParse("100"),
apiext.ResourceGPUMemoryRatio: resource.MustParse("100"),
apiext.ResourceGPUMemory: resource.MustParse("80Gi"),
},
6: corev1.ResourceList{
apiext.ResourceGPUCore: resource.MustParse("100"),
apiext.ResourceGPUMemoryRatio: resource.MustParse("100"),
apiext.ResourceGPUMemory: resource.MustParse("80Gi"),
},
7: corev1.ResourceList{
apiext.ResourceGPUCore: resource.MustParse("100"),
apiext.ResourceGPUMemoryRatio: resource.MustParse("100"),
apiext.ResourceGPUMemory: resource.MustParse("80Gi"),
},
8: corev1.ResourceList{
apiext.ResourceGPUCore: resource.MustParse("100"),
apiext.ResourceGPUMemoryRatio: resource.MustParse("100"),
apiext.ResourceGPUMemory: resource.MustParse("80Gi"),
},
},
},
deviceUsed: map[schedulingv1alpha1.DeviceType]deviceResources{},
vfAllocations: map[schedulingv1alpha1.DeviceType]*VFAllocation{},
numaTopology: &NUMATopology{},
deviceInfos: map[schedulingv1alpha1.DeviceType][]*schedulingv1alpha1.DeviceInfo{
schedulingv1alpha1.GPU: {
{
Type: schedulingv1alpha1.GPU,
Health: true,
UUID: "123456-0",
Minor: pointer.Int32(0),
Resources: corev1.ResourceList{
apiext.ResourceGPUCore: resource.MustParse("100"),
apiext.ResourceGPUMemoryRatio: resource.MustParse("100"),
apiext.ResourceGPUMemory: resource.MustParse("80Gi"),
},
},
{
Type: schedulingv1alpha1.GPU,
Health: true,
UUID: "123456-1",
Minor: pointer.Int32(1),
Resources: corev1.ResourceList{
apiext.ResourceGPUCore: resource.MustParse("100"),
apiext.ResourceGPUMemoryRatio: resource.MustParse("100"),
apiext.ResourceGPUMemory: resource.MustParse("80Gi"),
},
},
{
Type: schedulingv1alpha1.GPU,
Health: true,
UUID: "123456-2",
Minor: pointer.Int32(2),
Resources: corev1.ResourceList{
apiext.ResourceGPUCore: resource.MustParse("100"),
apiext.ResourceGPUMemoryRatio: resource.MustParse("100"),
apiext.ResourceGPUMemory: resource.MustParse("80Gi"),
},
},
{
Type: schedulingv1alpha1.GPU,
Health: true,
UUID: "123456-3",
Minor: pointer.Int32(3),
Resources: corev1.ResourceList{
apiext.ResourceGPUCore: resource.MustParse("100"),
apiext.ResourceGPUMemoryRatio: resource.MustParse("100"),
apiext.ResourceGPUMemory: resource.MustParse("80Gi"),
},
},
{
Type: schedulingv1alpha1.GPU,
Health: true,
UUID: "123456-4",
Minor: pointer.Int32(4),
Resources: corev1.ResourceList{
apiext.ResourceGPUCore: resource.MustParse("100"),
apiext.ResourceGPUMemoryRatio: resource.MustParse("100"),
apiext.ResourceGPUMemory: resource.MustParse("80Gi"),
},
},
{
Type: schedulingv1alpha1.GPU,
Health: true,
UUID: "123456-5",
Minor: pointer.Int32(5),
Resources: corev1.ResourceList{
apiext.ResourceGPUCore: resource.MustParse("100"),
apiext.ResourceGPUMemoryRatio: resource.MustParse("100"),
apiext.ResourceGPUMemory: resource.MustParse("80Gi"),
},
},
{
Type: schedulingv1alpha1.GPU,
Health: true,
UUID: "123456-6",
Minor: pointer.Int32(6),
Resources: corev1.ResourceList{
apiext.ResourceGPUCore: resource.MustParse("100"),
apiext.ResourceGPUMemoryRatio: resource.MustParse("100"),
apiext.ResourceGPUMemory: resource.MustParse("80Gi"),
},
},
{
Type: schedulingv1alpha1.GPU,
Health: true,
UUID: "123456-7",
Minor: pointer.Int32(7),
Resources: corev1.ResourceList{
apiext.ResourceGPUCore: resource.MustParse("100"),
apiext.ResourceGPUMemoryRatio: resource.MustParse("100"),
apiext.ResourceGPUMemory: resource.MustParse("80Gi"),
},
},
},
},
},
},
},
nodeInfo: testNodeInfo,
want: nil,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand Down Expand Up @@ -1910,6 +2125,7 @@ func Test_Plugin_Filter(t *testing.T) {
}
cycleState.Write(reservationRestoreStateKey, restoreState)
}
// fmt.Println("-----pod", pod.Spec.Containers[0].Resources)
status := p.Filter(context.TODO(), cycleState, pod, tt.nodeInfo)
assert.Equal(t, tt.want.Code(), status.Code())
assert.True(t, strings.Contains(status.Message(), tt.want.Message()))
Expand Down
51 changes: 42 additions & 9 deletions pkg/scheduler/plugins/deviceshare/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ const (
NvidiaGPU = 1 << iota
HygonDCU
KoordGPU
GPUShared
GPUCore
GPUMemory
GPUMemoryRatio
Expand All @@ -50,6 +51,7 @@ var DeviceResourceNames = map[schedulingv1alpha1.DeviceType][]corev1.ResourceNam
apiext.ResourceNvidiaGPU,
apiext.ResourceHygonDCU,
apiext.ResourceGPU,
apiext.ResourceGPUShared,
apiext.ResourceGPUCore,
apiext.ResourceGPUMemory,
apiext.ResourceGPUMemoryRatio,
Expand All @@ -65,20 +67,25 @@ var DeviceResourceFlags = map[corev1.ResourceName]uint{
apiext.ResourceGPUCore: GPUCore,
apiext.ResourceGPUMemory: GPUMemory,
apiext.ResourceGPUMemoryRatio: GPUMemoryRatio,
apiext.ResourceGPUShared: GPUShared,
apiext.ResourceFPGA: FPGA,
apiext.ResourceRDMA: RDMA,
}

var ValidDeviceResourceCombinations = map[uint]bool{
NvidiaGPU: true,
HygonDCU: true,
KoordGPU: true,
GPUMemory: true,
GPUMemoryRatio: true,
GPUCore | GPUMemory: true,
GPUCore | GPUMemoryRatio: true,
FPGA: true,
RDMA: true,
NvidiaGPU: true,
HygonDCU: true,
KoordGPU: true,
GPUMemory: true,
GPUMemoryRatio: true,
GPUCore | GPUMemory: true,
GPUCore | GPUMemoryRatio: true,
GPUShared | GPUMemory: true,
GPUShared | GPUMemoryRatio: true,
GPUShared | GPUCore | GPUMemory: true,
GPUShared | GPUCore | GPUMemoryRatio: true,
FPGA: true,
RDMA: true,
}

var DeviceResourceValidators = map[corev1.ResourceName]func(q resource.Quantity) bool{
Expand Down Expand Up @@ -118,6 +125,32 @@ var ResourceCombinationsMapper = map[uint]func(podRequest corev1.ResourceList) c
apiext.ResourceGPUMemoryRatio: podRequest[apiext.ResourceGPU],
}
},
GPUShared | GPUMemory: func(podRequest corev1.ResourceList) corev1.ResourceList {
return corev1.ResourceList{
apiext.ResourceGPUShared: podRequest[apiext.ResourceGPUShared],
apiext.ResourceGPUMemory: podRequest[apiext.ResourceGPUMemory],
}
},
GPUShared | GPUMemoryRatio: func(podRequest corev1.ResourceList) corev1.ResourceList {
return corev1.ResourceList{
apiext.ResourceGPUShared: podRequest[apiext.ResourceGPUShared],
apiext.ResourceGPUMemory: podRequest[apiext.ResourceGPUMemory],
}
},
GPUShared | GPUCore | GPUMemory: func(podRequest corev1.ResourceList) corev1.ResourceList {
return corev1.ResourceList{
apiext.ResourceGPUShared: podRequest[apiext.ResourceGPUShared],
apiext.ResourceGPUCore: podRequest[apiext.ResourceGPUCore],
apiext.ResourceGPUMemory: podRequest[apiext.ResourceGPUMemory],
}
},
GPUShared | GPUCore | GPUMemoryRatio: func(podRequest corev1.ResourceList) corev1.ResourceList {
return corev1.ResourceList{
apiext.ResourceGPUShared: podRequest[apiext.ResourceGPUShared],
apiext.ResourceGPUCore: podRequest[apiext.ResourceGPUCore],
apiext.ResourceGPUMemoryRatio: podRequest[apiext.ResourceGPUMemoryRatio],
}
},
NvidiaGPU: func(podRequest corev1.ResourceList) corev1.ResourceList {
nvidiaGPU := podRequest[apiext.ResourceNvidiaGPU]
return corev1.ResourceList{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,14 +161,17 @@ func (p *Plugin) calculate(node *corev1.Node, device *schedulingv1alpha1.Device)
// calculate gpu resources
gpuResources := make(corev1.ResourceList)
totalKoordGPU := resource.NewQuantity(0, resource.DecimalSI)
healthGPUNum := 0
for _, d := range device.Spec.Devices {
if d.Type != schedulingv1alpha1.GPU || !d.Health {
continue
}
healthGPUNum++
util.AddResourceList(gpuResources, d.Resources)
totalKoordGPU.Add(d.Resources[extension.ResourceGPUCore])
}
gpuResources[extension.ResourceGPU] = *totalKoordGPU
gpuResources[extension.ResourceGPUShared] = *resource.NewQuantity(int64(healthGPUNum)*100, resource.DecimalSI)
var items []framework.ResourceItem
// FIXME: shall we add node resources in devices but not in ResourceNames?
for resourceName := range gpuResources {
Expand Down
Loading

0 comments on commit 7e3031b

Please sign in to comment.