Skip to content

Commit

Permalink
ensure placeholder pod's limits carry the same amount of limits regar…
Browse files Browse the repository at this point in the history
…dless of resource type
  • Loading branch information
Huang-Wei committed Sep 30, 2023
1 parent 168bfec commit 88f609b
Show file tree
Hide file tree
Showing 4 changed files with 3 additions and 68 deletions.
3 changes: 1 addition & 2 deletions pkg/cache/placeholder.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ func newPlaceholder(placeholderName string, app *Application, taskGroup interfac

// prepare the resource lists
requests := utils.GetPlaceholderResourceRequests(taskGroup.MinResource)
limits := utils.GetPlaceholderResourceLimits(taskGroup.MinResource)
placeholderPod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: placeholderName,
Expand All @@ -112,7 +111,7 @@ func newPlaceholder(placeholderName string, app *Application, taskGroup interfac
ImagePullPolicy: v1.PullIfNotPresent,
Resources: v1.ResourceRequirements{
Requests: requests,
Limits: limits,
Limits: requests,
},
},
},
Expand Down
4 changes: 2 additions & 2 deletions pkg/cache/placeholder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ func TestNewPlaceholderExtendedResources(t *testing.T) {
app.setTaskGroups(taskGroups)
holder := newPlaceholder("ph-name", app, app.taskGroups[0])
assert.Equal(t, len(holder.pod.Spec.Containers[0].Resources.Requests), 5, "expected requests not found")
assert.Equal(t, len(holder.pod.Spec.Containers[0].Resources.Limits), 2, "limit for extended resource not found")
assert.Equal(t, len(holder.pod.Spec.Containers[0].Resources.Limits), 5, "expected limits not found")
assert.Equal(t, holder.pod.Spec.Containers[0].Resources.Limits[gpu], holder.pod.Spec.Containers[0].Resources.Requests[gpu], "gpu: expected same value for request and limit")
assert.Equal(t, holder.pod.Spec.Containers[0].Resources.Limits[hugepages], holder.pod.Spec.Containers[0].Resources.Requests[hugepages], "hugepages: expected same value for request and limit")
var priority *int32
Expand Down Expand Up @@ -272,7 +272,7 @@ func TestNewPlaceholderWithPriorityClassName(t *testing.T) {

holder := newPlaceholder("ph-name", app, app.taskGroups[0])
assert.Equal(t, len(holder.pod.Spec.Containers[0].Resources.Requests), 5, "expected requests not found")
assert.Equal(t, len(holder.pod.Spec.Containers[0].Resources.Limits), 2, "limit for extended resource not found")
assert.Equal(t, len(holder.pod.Spec.Containers[0].Resources.Limits), 5, "expected limits not found")
assert.Equal(t, holder.pod.Spec.Containers[0].Resources.Limits[gpu], holder.pod.Spec.Containers[0].Resources.Requests[gpu], "gpu: expected same value for request and limit")
assert.Equal(t, holder.pod.Spec.Containers[0].Resources.Limits[hugepages], holder.pod.Spec.Containers[0].Resources.Requests[hugepages], "hugepages: expected same value for request and limit")
var priority *int32
Expand Down
25 changes: 0 additions & 25 deletions pkg/common/utils/gang_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,31 +91,6 @@ func GetPlaceholderResourceRequests(resources map[string]resource.Quantity) v1.R
return resourceReq
}

// GetPlaceholderResourceLimits converts the map of resources requested into a list of resources for the limit
// specification that can be added to a pod. This only adds the minimal required resources that do not support
// over commit. The following resources do support over commit: all standard kubernetes resources (except hugepages*)
// All non-over committable, i.e. extended, resources MUST have a limit set to the same value as the request.
func GetPlaceholderResourceLimits(resources map[string]resource.Quantity) v1.ResourceList {
resourceLim := v1.ResourceList{}
for k, v := range resources {
if k == "" || allowOverCommit(k) {
continue
}
resourceLim[v1.ResourceName(k)] = v
}
return resourceLim
}

// allowOverCommit returns true if the resource can be over committed.
// This comes down to only allow the standard resources cpu, memory and ephemeral-storage to be over committed.
// We deviate from the K8s checks as opaque resources are no longer supported. Opaque resources were the only
// resources in the "kubernetes.io" domain.
// We slip in the "pods" resource on task groups but that does not cause an issue.
func allowOverCommit(name string) bool {
return !strings.Contains(name, "/") &&
!strings.HasPrefix(name, v1.ResourceHugePagesPrefix)
}

func GetSchedulingPolicyParam(pod *v1.Pod) *interfaces.SchedulingPolicyParameters {
timeout := int64(0)
style := constants.SchedulingPolicyStyleParamDefault
Expand Down
39 changes: 0 additions & 39 deletions pkg/common/utils/gang_utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,24 +140,6 @@ func TestGetSchedulingPolicyParams(t *testing.T) {
}
}

func Test_allowOverCommit(t *testing.T) {
tests := []struct {
name string
resName string
want bool
}{
{"standard", "memory", true},
{"pod as resource", "pods", true},
{"hugepages", "hugepages-small", false},
{"extended", "nvidia.com/gpu", false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equal(t, allowOverCommit(tt.resName), tt.want, "incorrect overcommit status")
})
}
}

func Test_GetPlaceholderResourceRequest(t *testing.T) {
tests := []struct {
name string
Expand All @@ -178,24 +160,3 @@ func Test_GetPlaceholderResourceRequest(t *testing.T) {
})
}
}

func Test_GetPlaceholderResourceLimits(t *testing.T) {
tests := []struct {
name string
resMap map[string]resource.Quantity
want v1.ResourceList
}{
{"nil", nil, v1.ResourceList{}},
{"empty", map[string]resource.Quantity{}, v1.ResourceList{}},
{"base", map[string]resource.Quantity{"pods": resource.MustParse("1")}, v1.ResourceList{}},
{"hugepages", map[string]resource.Quantity{"hugepages-huge": resource.MustParse("2")}, v1.ResourceList{"hugepages-huge": resource.MustParse("2")}},
{"mixed", map[string]resource.Quantity{"pods": resource.MustParse("4"), "nvidia.com/gpu": resource.MustParse("5")}, v1.ResourceList{"nvidia.com/gpu": resource.MustParse("5")}},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := GetPlaceholderResourceLimits(tt.resMap); !reflect.DeepEqual(got, tt.want) {
t.Errorf("GetPlaceholderResourceRequest() = %v, want %v", got, tt.want)
}
})
}
}

0 comments on commit 88f609b

Please sign in to comment.