Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validate longest pod name for jobset will not exceed 63 chars #409

Merged
merged 1 commit into from
Feb 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions api/jobset/v1alpha2/jobset_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"errors"
"fmt"
"math"
"strconv"

apivalidation "k8s.io/apimachinery/pkg/api/validation"
"k8s.io/apimachinery/pkg/runtime"
Expand Down Expand Up @@ -105,10 +106,21 @@ func (js *JobSet) ValidateCreate() (admission.Warnings, error) {
}
// Check that the generated job names for this replicated job will be DNS 1035 compliant.
// Use the largest job index as it will have the longest name.
testJobName := placement.GenJobName(js.Name, rjob.Name, int(rjob.Replicas-1))
for _, errMessage := range validation.IsDNS1035Label(testJobName) {
longestJobName := placement.GenJobName(js.Name, rjob.Name, int(rjob.Replicas-1))
for _, errMessage := range validation.IsDNS1035Label(longestJobName) {
allErrs = append(allErrs, fmt.Errorf(errMessage))
}
// Check that the generated pod names for the replicated job is DNS 1035 compliant.
isIndexedJob := rjob.Template.Spec.CompletionMode != nil && *rjob.Template.Spec.CompletionMode == batchv1.IndexedCompletion
if isIndexedJob && rjob.Template.Spec.Completions != nil {
maxJobIndex := strconv.Itoa(int(rjob.Replicas - 1))
maxPodIndex := strconv.Itoa(int(*rjob.Template.Spec.Completions - 1))
// Add 5 char suffix to the deterministic part of the pod name to validate the full pod name is compliant.
longestPodName := placement.GenPodName(js.Name, rjob.Name, maxJobIndex, maxPodIndex) + "-abcde"
for _, errMessage := range validation.IsDNS1035Label(longestPodName) {
allErrs = append(allErrs, fmt.Errorf(errMessage))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will the error message contain the -abcde? If so, I am afraid that will be confusing and perhaps better to have our own error message.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll test and confirm really quick

Copy link
Contributor Author

@danielvegamyhre danielvegamyhre Feb 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Error message looks good to me:

$ k apply -f examples/simple/test.yaml 
Error from server (Forbidden): error when creating "examples/simple/exclusive-placement.yaml": admission webhook "vjobset.kb.io" denied the request: must be no more than 63 characters

}
}
}
for _, rjobName := range js.Spec.SuccessPolicy.TargetReplicatedJobs {
if !collections.Contains(validReplicatedJobs, rjobName) {
Expand Down
27 changes: 27 additions & 0 deletions api/jobset/v1alpha2/jobset_webhook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,33 @@ func TestValidateCreate(t *testing.T) {
fmt.Errorf("must be no more than 63 characters"),
),
},
{
name: "jobset name will result in a pod name being too long",
js: &JobSet{
ObjectMeta: metav1.ObjectMeta{
Name: strings.Repeat("a", 56),
},
Spec: JobSetSpec{
ReplicatedJobs: []ReplicatedJob{
{
Name: "rj",
Replicas: 1,
Template: batchv1.JobTemplateSpec{
Spec: batchv1.JobSpec{
CompletionMode: ptr.To(batchv1.IndexedCompletion),
Completions: ptr.To(int32(1)),
Parallelism: ptr.To(int32(1)),
},
},
},
},
SuccessPolicy: &SuccessPolicy{},
},
},
want: errors.Join(
fmt.Errorf("must be no more than 63 characters"),
),
},
}

for _, tc := range testCases {
Expand Down
8 changes: 4 additions & 4 deletions pkg/util/placement/placement.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ func GenJobName(jsName, rjobName string, jobIndex int) string {
return fmt.Sprintf("%s-%s-%d", jsName, rjobName, jobIndex)
}

// GenLeaderPodName returns the name of the leader pod (pod with completion index 0)
// for a given job in a jobset.
func GenLeaderPodName(jobSet, replicatedJob, jobIndex string) string {
return fmt.Sprintf("%s-%s-%s-0", jobSet, replicatedJob, jobIndex)
// GenPodName returns the pod name for the given JobSet name, ReplicatedJob name,
// Job index, and Pod index.
func GenPodName(jobSet, replicatedJob, jobIndex, podIndex string) string {
return fmt.Sprintf("%s-%s-%s-%s", jobSet, replicatedJob, jobIndex, podIndex)
}

// IsLeaderPod returns true if the given pod is a leader pod (job completion index of 0),
Expand Down
5 changes: 3 additions & 2 deletions pkg/webhooks/pod_admission_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ func (p *podWebhook) leaderPodForFollower(ctx context.Context, pod *corev1.Pod)
return leaderPod, nil
}

// GenLeaderPodName accepts the name of a pod that is part of a jobset as input, and
// genLeaderPodName accepts the name of a pod that is part of a jobset as input, and
// returns the name of the pod with completion index 0 in the same child job.
func genLeaderPodName(pod *corev1.Pod) (string, error) {
// Pod name format: <jobset>-<replicatedJob>-<jobIndex>-<podIndex>-<randomSuffix>
Expand All @@ -123,5 +123,6 @@ func genLeaderPodName(pod *corev1.Pod) (string, error) {
if !ok {
return "", fmt.Errorf("pod missing label: %s", jobset.JobIndexKey)
}
return placement.GenLeaderPodName(jobSet, replicatedJob, jobIndex), nil
leaderPodName := placement.GenPodName(jobSet, replicatedJob, jobIndex, "0")
return leaderPodName, nil
}
2 changes: 1 addition & 1 deletion pkg/webhooks/pod_admission_webhook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import (
jobset "sigs.k8s.io/jobset/api/jobset/v1alpha2"
)

func TestGenLeaderPodName(t *testing.T) {
func TestLeaderPodName(t *testing.T) {
cases := []struct {
desc string
pod *corev1.Pod
Expand Down