From 8fa490f0c2ed588bbcc370060af1397dfb9acb49 Mon Sep 17 00:00:00 2001 From: Daniel Vega-Myhre Date: Tue, 13 Feb 2024 00:11:34 +0000 Subject: [PATCH] validate longest pod name for jobset will not exceed 63 chars --- api/jobset/v1alpha2/jobset_webhook.go | 16 +++++++++++-- api/jobset/v1alpha2/jobset_webhook_test.go | 27 ++++++++++++++++++++++ pkg/util/placement/placement.go | 8 +++---- pkg/webhooks/pod_admission_webhook.go | 5 ++-- pkg/webhooks/pod_admission_webhook_test.go | 2 +- 5 files changed, 49 insertions(+), 9 deletions(-) diff --git a/api/jobset/v1alpha2/jobset_webhook.go b/api/jobset/v1alpha2/jobset_webhook.go index c1605bb6..1523cd34 100644 --- a/api/jobset/v1alpha2/jobset_webhook.go +++ b/api/jobset/v1alpha2/jobset_webhook.go @@ -17,6 +17,7 @@ import ( "errors" "fmt" "math" + "strconv" apivalidation "k8s.io/apimachinery/pkg/api/validation" "k8s.io/apimachinery/pkg/runtime" @@ -105,10 +106,21 @@ func (js *JobSet) ValidateCreate() (admission.Warnings, error) { } // Check that the generated job names for this replicated job will be DNS 1035 compliant. // Use the largest job index as it will have the longest name. - testJobName := placement.GenJobName(js.Name, rjob.Name, int(rjob.Replicas-1)) - for _, errMessage := range validation.IsDNS1035Label(testJobName) { + longestJobName := placement.GenJobName(js.Name, rjob.Name, int(rjob.Replicas-1)) + for _, errMessage := range validation.IsDNS1035Label(longestJobName) { allErrs = append(allErrs, fmt.Errorf(errMessage)) } + // Check that the generated pod names for the replicated job is DNS 1035 compliant. + isIndexedJob := rjob.Template.Spec.CompletionMode != nil && *rjob.Template.Spec.CompletionMode == batchv1.IndexedCompletion + if isIndexedJob && rjob.Template.Spec.Completions != nil { + maxJobIndex := strconv.Itoa(int(rjob.Replicas - 1)) + maxPodIndex := strconv.Itoa(int(*rjob.Template.Spec.Completions - 1)) + // Add 5 char suffix to the deterministic part of the pod name to validate the full pod name is compliant. + longestPodName := placement.GenPodName(js.Name, rjob.Name, maxJobIndex, maxPodIndex) + "-abcde" + for _, errMessage := range validation.IsDNS1035Label(longestPodName) { + allErrs = append(allErrs, fmt.Errorf(errMessage)) + } + } } for _, rjobName := range js.Spec.SuccessPolicy.TargetReplicatedJobs { if !collections.Contains(validReplicatedJobs, rjobName) { diff --git a/api/jobset/v1alpha2/jobset_webhook_test.go b/api/jobset/v1alpha2/jobset_webhook_test.go index f5606a6f..25b8bca1 100644 --- a/api/jobset/v1alpha2/jobset_webhook_test.go +++ b/api/jobset/v1alpha2/jobset_webhook_test.go @@ -510,6 +510,33 @@ func TestValidateCreate(t *testing.T) { fmt.Errorf("must be no more than 63 characters"), ), }, + { + name: "jobset name will result in a pod name being too long", + js: &JobSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: strings.Repeat("a", 56), + }, + Spec: JobSetSpec{ + ReplicatedJobs: []ReplicatedJob{ + { + Name: "rj", + Replicas: 1, + Template: batchv1.JobTemplateSpec{ + Spec: batchv1.JobSpec{ + CompletionMode: ptr.To(batchv1.IndexedCompletion), + Completions: ptr.To(int32(1)), + Parallelism: ptr.To(int32(1)), + }, + }, + }, + }, + SuccessPolicy: &SuccessPolicy{}, + }, + }, + want: errors.Join( + fmt.Errorf("must be no more than 63 characters"), + ), + }, } for _, tc := range testCases { diff --git a/pkg/util/placement/placement.go b/pkg/util/placement/placement.go index bc5efe4d..93d75048 100644 --- a/pkg/util/placement/placement.go +++ b/pkg/util/placement/placement.go @@ -15,10 +15,10 @@ func GenJobName(jsName, rjobName string, jobIndex int) string { return fmt.Sprintf("%s-%s-%d", jsName, rjobName, jobIndex) } -// GenLeaderPodName returns the name of the leader pod (pod with completion index 0) -// for a given job in a jobset. -func GenLeaderPodName(jobSet, replicatedJob, jobIndex string) string { - return fmt.Sprintf("%s-%s-%s-0", jobSet, replicatedJob, jobIndex) +// GenPodName returns the pod name for the given JobSet name, ReplicatedJob name, +// Job index, and Pod index. +func GenPodName(jobSet, replicatedJob, jobIndex, podIndex string) string { + return fmt.Sprintf("%s-%s-%s-%s", jobSet, replicatedJob, jobIndex, podIndex) } // IsLeaderPod returns true if the given pod is a leader pod (job completion index of 0), diff --git a/pkg/webhooks/pod_admission_webhook.go b/pkg/webhooks/pod_admission_webhook.go index f5fb9de1..7eee0f76 100644 --- a/pkg/webhooks/pod_admission_webhook.go +++ b/pkg/webhooks/pod_admission_webhook.go @@ -107,7 +107,7 @@ func (p *podWebhook) leaderPodForFollower(ctx context.Context, pod *corev1.Pod) return leaderPod, nil } -// GenLeaderPodName accepts the name of a pod that is part of a jobset as input, and +// genLeaderPodName accepts the name of a pod that is part of a jobset as input, and // returns the name of the pod with completion index 0 in the same child job. func genLeaderPodName(pod *corev1.Pod) (string, error) { // Pod name format: ---- @@ -123,5 +123,6 @@ func genLeaderPodName(pod *corev1.Pod) (string, error) { if !ok { return "", fmt.Errorf("pod missing label: %s", jobset.JobIndexKey) } - return placement.GenLeaderPodName(jobSet, replicatedJob, jobIndex), nil + leaderPodName := placement.GenPodName(jobSet, replicatedJob, jobIndex, "0") + return leaderPodName, nil } diff --git a/pkg/webhooks/pod_admission_webhook_test.go b/pkg/webhooks/pod_admission_webhook_test.go index 85bc4e48..43802fa1 100644 --- a/pkg/webhooks/pod_admission_webhook_test.go +++ b/pkg/webhooks/pod_admission_webhook_test.go @@ -10,7 +10,7 @@ import ( jobset "sigs.k8s.io/jobset/api/jobset/v1alpha2" ) -func TestGenLeaderPodName(t *testing.T) { +func TestLeaderPodName(t *testing.T) { cases := []struct { desc string pod *corev1.Pod